Package ML :: Package Cluster :: Module Murtagh
[hide private]
[frames] | no frames]

Source Code for Module ML.Cluster.Murtagh

  1  # $Id: Murtagh.py 2 2006-05-06 22:54:39Z glandrum $ 
  2  # 
  3  # Copyright (C) 2001-2006  greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  """ Interface to the C++ Murtagh hierarchic clustering code 
  8   
  9  """ 
 10  from Numeric import * 
 11  from ML.Cluster import Clusters 
 12  from ML.Cluster.Clustering import MurtaghCluster,MurtaghDistCluster 
 13   
 14   
 15   
 16  # constants to select the clustering algorithm 
 17  WARDS=1 
 18  SLINK=2 
 19  CLINK=3 
 20  UPGMA=4 
 21  MCQUITTY=5 
 22  GOWER=6 
 23  CENTROID=7 
 24   
 25  # descriptions of the methods: 
 26  methods = [ 
 27    ("Ward's Minimum Variance",WARDS,"Ward's Minimum Variance"), 
 28    ('Average Linkage',UPGMA,'Group Average Linkage (UPGMA)'), 
 29    ('Single Linkage',SLINK,'Single Linkage (SLINK)'), 
 30    ('Complete Linkage',CLINK,'Complete Linkage (CLINK)'), 
 31  #  ("McQuitty",MCQUITTY,"McQuitty's method"), 
 32  #  ("Gower",GOWER,"Gower's median method"), 
 33    ("Centroid",CENTROID,"Centroid method"), 
 34    ] 
 35   
36 -def _LookupDist(dists,i,j,n):
37 """ *Internal Use Only* 38 39 returns the distance between points i and j in the symmetric 40 distance matrix _dists_ 41 42 """ 43 if i==j: return 0.0 44 if i > j: i,j=j,i 45 return dists[j*(j-1)/2+i]
46 47 48
49 -def _ToClusters(data,nPts,ia,ib,crit,isDistData=0):
50 """ *Internal Use Only* 51 52 Converts the results of the Murtagh clustering code into 53 a cluster tree, which is returned in a single-entry list 54 55 """ 56 cs = [None]*nPts 57 for i in range(nPts): 58 cs[i] = Clusters.Cluster(metric=0.0,data=i,index=(i+1)) 59 60 nClus = len(ia)-1 61 for i in range(nClus): 62 idx1 = ia[i]-1 63 idx2 = ib[i]-1 64 c1 = cs[idx1] 65 c2 = cs[idx2] 66 newClust = Clusters.Cluster(metric=crit[i],children=[c1,c2], 67 index=nPts+i+1) 68 cs[idx1] = newClust 69 70 return [newClust]
71 72
73 -def ClusterData(data,nPts,method,isDistData=0):
74 """ clusters the data points passed in and returns the cluster tree 75 76 **Arguments** 77 78 - data: a list of lists (or array, or whatever) with the input 79 data (see discussion of _isDistData_ argument for the exception) 80 81 - nPts: the number of points to be used 82 83 - method: determines which clustering algorithm should be used. 84 The defined constants for these are: 85 'WARDS, SLINK, CLINK, UPGMA' 86 87 - isDistData: set this toggle when the data passed in is a 88 distance matrix. The distance matrix should be stored 89 symmetrically so that _LookupDist (above) can retrieve 90 the results: 91 for i<j: d_ij = dists[j*(j-1)/2 + i] 92 93 94 **Returns** 95 96 - a single entry list with the cluster tree 97 """ 98 data = array(data) 99 if not isDistData: 100 sz = data.shape[1] 101 ia,ib,crit = MurtaghCluster(data,nPts,sz,method) 102 else: 103 ia,ib,crit = MurtaghDistCluster(data,nPts,method) 104 c = _ToClusters(data,nPts,ia,ib,crit,isDistData=isDistData) 105 106 return c
107