1
2
3
4
5
6
7 """ contains the Cluster class for representing hierarchical cluster trees
8
9 """
10 from Numeric import *
11
12 CMPTOL=1e-6
13
15 """a class for storing clusters/data
16
17 **General Remarks**
18
19 - It is assumed that the bottom of any cluster hierarchy tree is composed of
20 the individual data points which were clustered.
21
22 - Clusters objects store the following pieces of data, most are
23 accessible via standard Setters/Getters:
24
25 - Children: *Not Settable*, the list of children. You can add children
26 with the _AddChild()_ and _AddChildren()_ methods.
27
28 **Note** this can be of arbitrary length,
29 but the current algorithms I have only produce trees with two children
30 per cluster
31
32 - Metric: the metric for this cluster (i.e. how far apart its children are)
33
34 - Index: the order in which this cluster was generated
35
36 - Points: *Not Settable*, the list of original points in this cluster
37 (calculated recursively from the children)
38
39 - PointsPositions: *Not Settable*, the list of positions of the original
40 points in this cluster (calculated recursively from the children)
41
42 - Position: the location of the cluster **Note** for a cluster this
43 probably means the location of the average of all the Points which are
44 its children.
45
46 - Data: a data field. This is used with the original points to store their
47 data value (i.e. the value we're using to classify)
48
49 - Name: the name of this cluster
50
51 """
52 - def __init__(self,metric=0.0,children=None,position=None,index=-1,name=None,data=None):
53 """Constructor
54
55 **Arguments**
56
57 see the class documentation for the meanings of these arguments
58
59 *my wrists are tired*
60
61 """
62 if children is None:
63 children = []
64 if position is None:
65 position = []
66 self.metric = metric
67 self.children = children
68 self._UpdateLength()
69 self.pos = position
70 self.index = index
71 self.name = name
72 self._points = None
73 self._pointsPositions = None
74 self.data = data
75
80
85
90
92 if self._pointsPositions is not None:
93 return self._pointsPositions
94 else:
95 self._GenPoints()
96 return self._pointsPositions
97
99 if self._points is not None:
100 return self._points
101 else:
102 self._GenPoints()
103 return self._points
104
106 """ finds and returns the subtree with a particular index
107 """
108 res = None
109 if index == self.index:
110 res = self
111 else:
112 for child in self.children:
113 res = child.FindSubtree(index)
114 if res:
115 break
116 return res
117
119 """ Generates the _Points_ and _PointsPositions_ lists
120
121 *intended for internal use*
122
123 """
124 if len(self) == 1:
125 self._points = [self]
126 self._pointsPositions = [self.GetPosition()]
127 return self._points
128 else:
129 res = []
130 children = self.GetChildren()
131 children.sort(lambda x,y:cmp(len(y),len(x)))
132 for child in children:
133 res += child.GetPoints()
134 self._points=res
135 self._pointsPositions = map(lambda x:x.GetPosition(),res)
136
160 """Removes a child from our list
161
162 **Arguments**
163
164 - child: a Cluster
165
166 """
167 self.children.remove(child)
168 self._UpdateLength()
169
173
178
182 if self.name is None:
183 return 'Cluster(%d)'%(self.GetIndex())
184 else:
185 return self.name
186
187 - def Print(self,level=0,showData=0,offset='\t'):
188 if not showData or self.GetData() is None:
189 print '%s%s%s Metric: %f'%(' '*level,self.GetName(),offset,self.GetMetric())
190 else:
191 print '%s%s%s Data: %f\t Metric: %f'%(' '*level,self.GetName(),offset,self.GetData(),self.GetMetric())
192
193 for child in self.GetChildren():
194 child.Print(level=level+1,showData=showData,offset=offset)
195
196 - def Compare(self,other,ignoreExtras=1):
197 """ not as choosy as self==other
198
199 """
200 if cmp(type(self),type(other)):
201 return cmp(type(self),type(other))
202
203 if cmp(len(self),len(other)):
204 return cmp(len(self),len(other))
205
206 if not ignoreExtras:
207 m1,m2=self.GetMetric(),other.GetMetric()
208 if abs(m1-m2)>CMPTOL:
209 return cmp(m1,m2)
210
211 if cmp(self.GetName(),other.GetName()):
212 return cmp(self.GetName(),other.GetName())
213
214 sP = self.GetPosition()
215 oP = other.GetPosition()
216 try:
217 r = cmp(len(sP),len(oP))
218 except:
219 pass
220 else:
221 if r:
222 return r
223
224 try:
225 r = cmp(sP,oP)
226 except:
227 r = sum(sP-oP)
228 if r:
229 return r
230
231 c1,c2=self.GetChildren(),other.GetChildren()
232 if cmp(len(c1),len(c2)):
233 return cmp(len(c1),len(c2))
234 for i in range(len(c1)):
235 t = c1[i].Compare(c2[i],ignoreExtras=ignoreExtras)
236 if t:
237 return t
238
239 return 0
240
242 """ updates our length
243
244 *intended for internal use*
245
246 """
247 self._len = reduce(lambda x,y: len(y)+x,self.children,1)
248
251
253 """ allows _len(cluster)_ to work
254
255 """
256 return self._len
257
259 """ allows _cluster1 == cluster2_ to work
260
261 """
262 if cmp(type(self),type(other)):
263 return cmp(type(self),type(other))
264
265 m1,m2=self.GetMetric(),other.GetMetric()
266 if abs(m1-m2)>CMPTOL:
267 return cmp(m1,m2)
268
269 if cmp(self.GetName(),other.GetName()):
270 return cmp(self.GetName(),other.GetName())
271
272 c1,c2=self.GetChildren(),other.GetChildren()
273 return cmp(c1,c2)
274
275
276 if __name__ == '__main__':
277 from ML.Cluster import ClusterUtils
278 root = Cluster(index=1,metric=1000)
279 c1 = Cluster(index=10,metric=100)
280 c1.AddChild(Cluster(index=30,metric=10))
281 c1.AddChild(Cluster(index=31,metric=10))
282 c1.AddChild(Cluster(index=32,metric=10))
283
284 c2 = Cluster(index=11,metric=100)
285 c2.AddChild(Cluster(index=40,metric=10))
286 c2.AddChild(Cluster(index=41,metric=10))
287
288 root.AddChild(c1)
289 root.AddChild(c2)
290
291 nodes = ClusterUtils.GetNodeList(root)
292
293 indices = [x.GetIndex() for x in nodes]
294 print 'XXX:',indices
295