1
2
3
4 """ code for calculating empirical risk
5
6 """
7 from Numeric import *
8 import math
9
12
14 """ Calculates Burges's formulation of the risk bound
15
16 The formulation is from Eqn. 3 of Burges's review
17 article "A Tutorial on Support Vector Machines for Pattern Recognition"
18 In _Data Mining and Knowledge Discovery_ Kluwer Academic Publishers
19 (1998) Vol. 2
20
21 **Arguments**
22
23 - VCDim: the VC dimension of the system
24
25 - nData: the number of data points used
26
27 - nWrong: the number of data points misclassified
28
29 - conf: the confidence to be used for this risk bound
30
31
32 **Returns**
33
34 - a float
35
36 **Notes**
37
38 - This has been validated against the Burges paper
39
40 - I believe that this is only technically valid for binary classification
41
42 """
43
44 h = VCDim
45 l = nData
46 eta = conf
47
48 numerator = h * (log(2.*l/h) + 1.) - log(eta/4.)
49 structRisk = sqrt(numerator/l)
50
51 rEmp = float(nWrong)/l
52
53 return rEmp + structRisk
54
56 """
57 the formulation here is from pg 58, Theorem 4.6 of the book
58 "An Introduction to Support Vector Machines" by Cristiani and Shawe-Taylor
59 Cambridge University Press, 2000
60
61
62 **Arguments**
63
64 - VCDim: the VC dimension of the system
65
66 - nData: the number of data points used
67
68 - nWrong: the number of data points misclassified
69
70 - conf: the confidence to be used for this risk bound
71
72
73 **Returns**
74
75 - a float
76
77 **Notes**
78
79 - this generates odd (mismatching) values
80
81 """
82
83
84 d = VCDim
85 delta = conf
86 l = nData
87 k = nWrong
88
89 structRisk = sqrt((4./nData) * ( d*log2((2.*math.e*l)/d) + log2(4./delta) ))
90 rEmp = 2.*k/l
91 return rEmp + structRisk
92
94 """
95
96 The formulation here is from Eqns 4.22 and 4.23 on pg 108 of
97 Cherkassky and Mulier's book "Learning From Data" Wiley, 1998.
98
99 **Arguments**
100
101 - VCDim: the VC dimension of the system
102
103 - nData: the number of data points used
104
105 - nWrong: the number of data points misclassified
106
107 - conf: the confidence to be used for this risk bound
108
109 - a1, a2: constants in the risk equation. Restrictions on these values:
110
111 - 0 <= a1 <= 4
112
113 - 0 <= a2 <= 2
114
115 **Returns**
116
117 - a float
118
119
120 **Notes**
121
122 - This appears to behave reasonably
123
124 - the equality a1=1.0 is by analogy to Burges's paper.
125
126 """
127
128 h = VCDim
129 n = nData
130 eta = conf
131 rEmp = float(nWrong)/nData
132
133 numerator = h * (log(float(a2*n)/h) + 1) - log(eta/4.)
134 eps = a1 * numerator / n
135
136 structRisk = eps/2. * (1. + sqrt(1. + (4.*rEmp/eps)))
137
138 return rEmp + structRisk
139
140
141
142 if __name__ == '__main__':
143 import sys
144 rEmp = 0
145 nWrong = 0
146 nPts = 10000
147 eps = min(4./sqrt(nPts),1.)
148 if len(sys.argv) > 1:
149 eps2 = float(sys.argv[1])
150 else:
151 eps2 = .05
152
153 for nClusters in arange(1,5000,1):
154 print '% 2f\t % 6.4f\t % 6.4f'%(nClusters,
155 CherkasskyRiskBound(nClusters,nPts,nWrong,eps),
156 BurgesRiskBound(nClusters,nPts,nWrong,eps2))
157