1
2
3
4
5
6
7 """ Interface to the C++ Murtagh hierarchic clustering code
8
9 """
10 from Numeric import *
11 from ML.Cluster import Clusters
12 from ML.Cluster.Clustering import MurtaghCluster,MurtaghDistCluster
13
14
15
16
17 WARDS=1
18 SLINK=2
19 CLINK=3
20 UPGMA=4
21 MCQUITTY=5
22 GOWER=6
23 CENTROID=7
24
25
26 methods = [
27 ("Ward's Minimum Variance",WARDS,"Ward's Minimum Variance"),
28 ('Average Linkage',UPGMA,'Group Average Linkage (UPGMA)'),
29 ('Single Linkage',SLINK,'Single Linkage (SLINK)'),
30 ('Complete Linkage',CLINK,'Complete Linkage (CLINK)'),
31
32
33 ("Centroid",CENTROID,"Centroid method"),
34 ]
35
37 """ *Internal Use Only*
38
39 returns the distance between points i and j in the symmetric
40 distance matrix _dists_
41
42 """
43 if i==j: return 0.0
44 if i > j: i,j=j,i
45 return dists[j*(j-1)/2+i]
46
47
48
50 """ *Internal Use Only*
51
52 Converts the results of the Murtagh clustering code into
53 a cluster tree, which is returned in a single-entry list
54
55 """
56 cs = [None]*nPts
57 for i in range(nPts):
58 cs[i] = Clusters.Cluster(metric=0.0,data=i,index=(i+1))
59
60 nClus = len(ia)-1
61 for i in range(nClus):
62 idx1 = ia[i]-1
63 idx2 = ib[i]-1
64 c1 = cs[idx1]
65 c2 = cs[idx2]
66 newClust = Clusters.Cluster(metric=crit[i],children=[c1,c2],
67 index=nPts+i+1)
68 cs[idx1] = newClust
69
70 return [newClust]
71
72
74 """ clusters the data points passed in and returns the cluster tree
75
76 **Arguments**
77
78 - data: a list of lists (or array, or whatever) with the input
79 data (see discussion of _isDistData_ argument for the exception)
80
81 - nPts: the number of points to be used
82
83 - method: determines which clustering algorithm should be used.
84 The defined constants for these are:
85 'WARDS, SLINK, CLINK, UPGMA'
86
87 - isDistData: set this toggle when the data passed in is a
88 distance matrix. The distance matrix should be stored
89 symmetrically so that _LookupDist (above) can retrieve
90 the results:
91 for i<j: d_ij = dists[j*(j-1)/2 + i]
92
93
94 **Returns**
95
96 - a single entry list with the cluster tree
97 """
98 data = array(data)
99 if not isDistData:
100 sz = data.shape[1]
101 ia,ib,crit = MurtaghCluster(data,nPts,sz,method)
102 else:
103 ia,ib,crit = MurtaghDistCluster(data,nPts,method)
104 c = _ToClusters(data,nPts,ia,ib,crit,isDistData=isDistData)
105
106 return c
107