1
2
3
4
5
6
7 """utility functions for clustering
8
9 """
10
12 """returns an ordered list of all nodes below cluster
13
14 the ordering is done using the lengths of the child nodes
15
16 **Arguments**
17
18 - cluster: the cluster in question
19
20 **Returns**
21
22 - a list of the leaves below this cluster
23
24 """
25 if len(cluster) == 1:
26 return [cluster]
27 else:
28 children = cluster.GetChildren()
29 children.sort(lambda x,y:cmp(len(y),len(x)))
30 res = []
31 for child in children:
32 res += GetNodeList(child)
33 res += [cluster]
34 return res
35
37 """returns an ordered list of all nodes below cluster
38
39
40 """
41 if hasattr(cluster,'_isCentroid'):
42 cluster._aboveCentroid = 0
43 above = -1
44 else:
45 cluster._aboveCentroid = above
46 if len(cluster) == 1:
47 return [cluster]
48 else:
49 res = []
50 children = cluster.GetChildren()
51 children.sort(lambda x,y:cmp(len(y),len(x)))
52 for child in children:
53 res = res + GetNodesDownToCentroids(child,above)
54 res = res + [cluster]
55 return res
56
58 """ find the point in a cluster which has the smallest summed
59 Euclidean distance to all others
60
61 **Arguments**
62
63 - cluster: the cluster to work with
64
65 - dists: the distance matrix to use for the points
66
67 **Returns**
68
69 - the index of the centroid point
70
71 """
72 children = cluster.GetPoints()
73 pts = [x.GetData() for x in children]
74
75 best = 1e24
76 bestIdx = -1
77 for pt in pts:
78 dAccum = 0.0
79
80 for other in pts:
81 if other != pt:
82 if other > pt: row,col = pt,other
83 else: row,col = other,pt
84 dAccum += dists[col*(col-1)/2+row]
85 if dAccum >= best:
86
87 break
88 if dAccum < best:
89 best = dAccum
90 bestIdx = pt
91 for i in range(len(pts)):
92 pt = pts[i]
93 if pt != bestIdx:
94 if pt > bestIdx: row,col = bestIdx,pt
95 else: row,col = pt,bestIdx
96 children[i]._distToCenter = dists[col*(col-1)/2+row]
97 else:
98 children[i]._distToCenter = 0.0
99 children[i]._clustCenter = bestIdx
100 cluster._clustCenter=bestIdx
101 cluster._distToCenter=0.0
102
103 return bestIdx
104
106 """ *Internal Use Only*
107
108 """
109 if len(cluster)<n:
110 raise ValueError,'Cannot split cluster of length %d into %d pieces'%(len(cluster),n)
111 if len(cluster)==n:
112 return cluster.GetPoints()
113 clusters = [cluster]
114 nxtIdx = 0
115 for i in range(n-1):
116 while nxtIdx<len(clusters) and len(clusters[nxtIdx])==1:
117 nxtIdx+=1
118 assert nxtIdx<len(clusters)
119
120 children = clusters[nxtIdx].GetChildren()
121 children.sort(_sortHelper)
122 for child in children:
123 clusters.append(child)
124 del clusters[nxtIdx]
125 return clusters
126
128 """ *Internal Use Only*
129
130 Used to sort cluster nodes
131
132 """
133 return cmp(cb.GetMetric(),ca.GetMetric())
134
136 """ *Internal Use Only*
137
138 """
139 if len(cluster)<n:
140 raise ValueError,'Cannot split cluster of length %d into %d pieces'%(len(cluster),n)
141 if len(cluster)==n:
142 return cluster.GetPoints()
143 clusters = [cluster]
144 for i in range(n-1):
145 nxtIdx = 0
146 while nxtIdx<len(clusters) and len(clusters[nxtIdx])==1:
147 nxtIdx+=1
148 assert nxtIdx<len(clusters)
149
150 children = clusters[nxtIdx].GetChildren()
151 for child in children:
152 clusters.append(child)
153 del clusters[nxtIdx]
154 clusters.sort(_sortHelper)
155 return clusters
156
157
159 """ splits a cluster tree into a set of branches
160
161 **Arguments**
162
163 - cluster: the root of the cluster tree
164
165 - n: the number of clusters to include in the split
166
167 - breadthFirst: toggles breadth first (vs depth first) cleavage
168 of the cluster tree.
169
170 **Returns**
171
172 - a list of sub clusters
173
174 """
175 if breadthFirst:
176 return _BreadthFirstSplit(cluster,n)
177 else:
178 return _HeightFirstSplit(cluster,n)
179