1
2
3
4
5
6
7 raise NotImplementedError,'not finished yet'
8 """ lazy generator of 2D pharmacophore signature data
9
10 """
11 import rdkit.Chem
12 from rdkit.Chem.Pharm2D import SigFactory,Matcher,Utils
13
15 """
16
17 Important attributes:
18
19 - mol: the molecules whose signature is being worked with
20
21 - sigFactory : the SigFactory object with signature parameters
22 NOTE: no preprocessing is carried out for _sigFactory_.
23 It *must* be pre-initialized.
24
25 **Notes**
26
27 -
28 """
29 - def __init__(self,sigFactory,mol,dMat=None,bitCache=True):
30 """ constructor
31
32 **Arguments**
33
34 - sigFactory: a signature factory, see class docs
35
36 - mol: a molecule, see class docs
37
38 - dMat: (optional) a distance matrix for the molecule. If this
39 is not provided, one will be calculated
40
41 - bitCache: (optional) if nonzero, a local cache of which bits
42 have been queried will be maintained. Otherwise things must
43 be recalculate each time a bit is queried.
44
45 """
46 if not isinstance(sigFactory,SigFactory.SigFactory):
47 raise ValueError,'bad factory'
48
49 self.sigFactory = sigFactory
50 self.mol = mol
51
52 if dMat is None:
53 useBO = sigFactory.includeBondOrder
54 dMat = Chem.GetDistanceMatrix(mol,useBO)
55
56 self.dMat = dMat
57
58 if bitCache:
59 self.bits = {}
60 else:
61 self.bits = None
62
63 featFamilies=[fam for fam in sigFactory.featFactory.GetFeatureFamilies() if fam not in sigFactory.skipFeats]
64 nFeats = len(featFamilies)
65 featMatches={}
66 for fam in featFamilies:
67 featMatches[fam] = []
68 feats = sigFactory.featFactory.GetFeaturesForMol(mol)
69 for feat in feats:
70 if feat.GetFamily() not in sigFactory.skipFeats:
71 featMatches[feat.GetFamily()].append(feat.GetAtomIds())
72 featMatches = [None]*nFeats
73 for i in range(nFeats):
74 featMatches[i]=sigFactory.featFactory.GetMolFeature()
75 self.pattMatches = pattMatches
76
78 """ returns a bool indicating whether or not the bit is set
79
80 """
81 if idx < 0 or idx >= self.sig.GetSize():
82 raise IndexError,'Index %d invalid'%(idx)
83 if self.bits is not None and self.bits.has_key(idx):
84 return self.bits[idx]
85
86 tmp = Matcher.GetAtomsMatchingBit(self.sig,idx,self.mol,
87 dMat=self.dMat,justOne=1,
88 matchingAtoms=self.pattMatches)
89 if not tmp or len(tmp)==0: res = 0
90 else: res = 1
91
92 if self.bits is not None:
93 self.bits[idx] = res
94 return res
95
97 """ allows class to support len()
98
99 """
100 return self.sig.GetSize()
102 """ allows class to support random access.
103 Calls self.GetBit()
104
105 """
106 return self.GetBit(itm)
107
108
109
110
111 if __name__ == '__main__':
112 import time
113 from rdkit import RDConfig,Chem
114 from rdkit.Chem.Pharm2D import Gobbi_Pharm2D,Generate
115 import random
116
117 factory = Gobbi_Pharm2D.factory
118 nToDo=100
119 inD = open(RDConfig.RDDataDir+"/NCI/first_5K.smi",'r').readlines()[:nToDo]
120 mols = [None]*len(inD)
121 for i in range(len(inD)):
122 smi = inD[i].split('\t')[0]
123 smi.strip()
124 mols[i] = Chem.MolFromSmiles(smi)
125
126 sig = factory.GetSignature()
127
128 nBits = 300
129 random.seed(23)
130 bits = [random.randint(0,sig.GetSize()-1) for x in range(nBits)]
131
132 print 'Using the Lazy Generator'
133 t1 = time.time()
134 for i in range(len(mols)):
135 if not i % 10: print 'done mol %d of %d'%(i,len(mols))
136 gen = Generator(factory,mols[i])
137 for bit in bits:
138 v = gen[bit]
139 t2 = time.time()
140 print '\tthat took %4.2f seconds'%(t2-t1)
141
142
143 print 'Generating and checking signatures'
144 t1 = time.time()
145 for i in range(len(mols)):
146 if not i % 10: print 'done mol %d of %d'%(i,len(mols))
147 sig = Generate.Gen2DFingerprint(mols[i],factory)
148 for bit in bits:
149 v = sig[bit]
150 t2 = time.time()
151 print '\tthat took %4.2f seconds'%(t2-t1)
152