1
2
3
4
5
6
7 """utility functions for clustering
8
9 """
10 from Numeric import *
11
13 """returns an ordered list of all nodes below cluster
14
15 the ordering is done using the lengths of the child nodes
16
17 **Arguments**
18
19 - cluster: the cluster in question
20
21 **Returns**
22
23 - a list of the leaves below this cluster
24
25 """
26 if len(cluster) == 1:
27 return [cluster]
28 else:
29 res = []
30 children = cluster.GetChildren()
31 children.sort(lambda x,y:cmp(len(y),len(x)))
32 for child in children:
33 res += GetNodeList(child)
34 res += [cluster]
35 return res
36
38 """returns an ordered list of all nodes below cluster
39
40
41 """
42 if hasattr(cluster,'_isCentroid'):
43 cluster._aboveCentroid = 0
44 above = -1
45 else:
46 cluster._aboveCentroid = above
47 if len(cluster) == 1:
48 return [cluster]
49 else:
50 res = []
51 children = cluster.GetChildren()
52 children.sort(lambda x,y:cmp(len(y),len(x)))
53 for child in children:
54 res = res + GetNodesDownToCentroids(child,above)
55 res = res + [cluster]
56 return res
57
59 """ find the point in a cluster which has the smallest summed
60 Euclidean distance to all others
61
62 **Arguments**
63
64 - cluster: the cluster to work with
65
66 - dists: the distance matrix to use for the points
67
68 **Returns**
69
70 - the index of the centroid point
71
72 """
73 children = cluster.GetPoints()
74 pts = map(lambda x:x.GetData(),children)
75
76 best = 1e24
77 bestIdx = -1
78 for pt in pts:
79 dAccum = 0.0
80
81 for other in pts:
82 if other != pt:
83 if other > pt: row,col = pt,other
84 else: row,col = other,pt
85 dAccum += dists[col*(col-1)/2+row]
86 if dAccum >= best:
87
88 break
89 if dAccum < best:
90 best = dAccum
91 bestIdx = pt
92 for i in range(len(pts)):
93 pt = pts[i]
94 if pt != bestIdx:
95 if pt > bestIdx: row,col = bestIdx,pt
96 else: row,col = pt,bestIdx
97 children[i]._distToCenter = dists[col*(col-1)/2+row]
98 else:
99 children[i]._distToCenter = 0.0
100 children[i]._clustCenter = bestIdx
101 cluster._clustCenter=bestIdx
102 cluster._distToCenter=0.0
103
104 return bestIdx
105
107 """ *Internal Use Only*
108
109 """
110 if len(cluster)<n:
111 raise ValueError,'Cannot split cluster of length %d into %d pieces'%(len(cluster),n)
112 if len(cluster)==n:
113 return cluster.GetPoints()
114 clusters = [cluster]
115 nxtIdx = 0
116 for i in range(n-1):
117 while nxtIdx<len(clusters) and len(clusters[nxtIdx])==1:
118 nxtIdx+=1
119 assert nxtIdx<len(clusters)
120
121 children = clusters[nxtIdx].GetChildren()
122 children.sort(_sortHelper)
123 for child in children:
124 clusters.append(child)
125 del clusters[nxtIdx]
126 return clusters
127
129 """ *Internal Use Only*
130
131 Used to sort cluster nodes
132
133 """
134 return cmp(cb.GetMetric(),ca.GetMetric())
135
137 """ *Internal Use Only*
138
139 """
140 if len(cluster)<n:
141 raise ValueError,'Cannot split cluster of length %d into %d pieces'%(len(cluster),n)
142 if len(cluster)==n:
143 return cluster.GetPoints()
144 clusters = [cluster]
145 for i in range(n-1):
146 nxtIdx = 0
147 while nxtIdx<len(clusters) and len(clusters[nxtIdx])==1:
148 nxtIdx+=1
149 assert nxtIdx<len(clusters)
150
151 children = clusters[nxtIdx].GetChildren()
152 for child in children:
153 clusters.append(child)
154 del clusters[nxtIdx]
155 clusters.sort(_sortHelper)
156 return clusters
157
158
160 """ splits a cluster tree into a set of branches
161
162 **Arguments**
163
164 - cluster: the root of the cluster tree
165
166 - n: the number of clusters to include in the split
167
168 - breadthFirst: toggles breadth first (vs depth first) cleavage
169 of the cluster tree.
170
171 **Returns**
172
173 - a list of sub clusters
174
175 """
176 if breadthFirst:
177 return _BreadthFirstSplit(cluster,n)
178 else:
179 return _HeightFirstSplit(cluster,n)
180