Package ML :: Package InfoTheory :: Module BitClusterer
[hide private]
[frames] | no frames]

Source Code for Module ML.InfoTheory.BitClusterer

 1  #
 
 2  #  Copyright (C) 2000,2003  Rational Discovery LLC
 
 3  #
 
 4  
 
 5  from SimDivFilters import rdSimDivPickers as rdsimdiv 
 6  import DataStructs 
 7  from Numeric import * 
8 -class BitClusterer(object):
9 """ Class to cluster a set of bits based on their correllation 10 11 The correlation matrix is first built using by reading the fingerprints 12 from a database or a list of fingerprints 13 """ 14
15 - def __init__(self, idList, nCluster, type=rdsimdiv.ClusterMethod.WARD):
16 self._clusters = [] 17 self._bidList = idList 18 #self._matGen = BitCorrelationMatGenerator(idList) 19 self._nClusters = nCluster 20 self._type = type
21
22 - def ClusterBits(self, corrMat) :
23 # clutering code actually needs distances so, take 1/val for each element in corMat 24 distMat = 1/corrMat 25 26 pkr = rdsimdiv.HierarchicalClusterPicker(self._type) 27 28 cls = pkr.Cluster(distMat, len(self._bidList), self._nClusters) 29 # map the clusters to the actual bit ids 30 self._clusters = [] 31 for cl in cls : 32 bcls = [] 33 for i in cl : 34 bid = self._bidList[i] 35 bcls.append(bid) 36 self._clusters.append(bcls)
37
38 - def SetClusters(self, clusters):
39 assert len(clusters) == self._nClusters 40 self._clusters = clusters
41
42 - def GetClusters(self) :
43 return self._clusters
44
45 - def MapToClusterScores(self, fp) :
46 """ Map the fingerprint to a real valued vector of score based on the bit clusters 47 48 The dimension of the vector is same as the number of clusters. Each value in the 49 vector corresponds to the number of bits in the corresponding cluster 50 that are turned on in the fingerprint 51 52 ARGUMENTS: 53 - fp : the fingerprint 54 """ 55 56 scores = [0]*self._nClusters 57 58 i = 0 59 for cls in self._clusters: 60 for bid in cls : 61 if fp[bid] : 62 scores[i] += 1 63 64 i += 1 65 66 return scores
67
68 - def MapToClusterFP(self, fp) :
69 """ Map the fingerprint to a smaller sized (= number of clusters) fingerprint 70 71 Each cluster get a bit in the new fingerprint and is turned on if any of the bits in 72 the cluster are turned on in the original fingerprint""" 73 74 ebv = DataStructs.ExplicitBitVect(self._nClusters) 75 i = 0 76 77 for cls in self._clusters: 78 for bid in cls : 79 if fp[bid] : 80 ebv.SetBit(i) 81 break 82 i += 1 83 84 return ebv
85