1
2
3
4
5 from SimDivFilters import rdSimDivPickers as rdsimdiv
6 import DataStructs
7 from Numeric import *
9 """ Class to cluster a set of bits based on their correllation
10
11 The correlation matrix is first built using by reading the fingerprints
12 from a database or a list of fingerprints
13 """
14
16 self._clusters = []
17 self._bidList = idList
18
19 self._nClusters = nCluster
20 self._type = type
21
23
24 distMat = 1/corrMat
25
26 pkr = rdsimdiv.HierarchicalClusterPicker(self._type)
27
28 cls = pkr.Cluster(distMat, len(self._bidList), self._nClusters)
29
30 self._clusters = []
31 for cl in cls :
32 bcls = []
33 for i in cl :
34 bid = self._bidList[i]
35 bcls.append(bid)
36 self._clusters.append(bcls)
37
39 assert len(clusters) == self._nClusters
40 self._clusters = clusters
41
44
46 """ Map the fingerprint to a real valued vector of score based on the bit clusters
47
48 The dimension of the vector is same as the number of clusters. Each value in the
49 vector corresponds to the number of bits in the corresponding cluster
50 that are turned on in the fingerprint
51
52 ARGUMENTS:
53 - fp : the fingerprint
54 """
55
56 scores = [0]*self._nClusters
57
58 i = 0
59 for cls in self._clusters:
60 for bid in cls :
61 if fp[bid] :
62 scores[i] += 1
63
64 i += 1
65
66 return scores
67
69 """ Map the fingerprint to a smaller sized (= number of clusters) fingerprint
70
71 Each cluster get a bit in the new fingerprint and is turned on if any of the bits in
72 the cluster are turned on in the original fingerprint"""
73
74 ebv = DataStructs.ExplicitBitVect(self._nClusters)
75 i = 0
76
77 for cls in self._clusters:
78 for bid in cls :
79 if fp[bid] :
80 ebv.SetBit(i)
81 break
82 i += 1
83
84 return ebv
85