1
2
3
4
5
6
7 """ contains factory class for producing signatures
8
9
10 """
11 from rdkit.DataStructs import SparseBitVect,IntSparseIntVect,LongSparseIntVect
12 from rdkit.Chem.Pharm2D import Utils
13 import copy
14 import numpy
15
16 _verbose = False
17
18
20 """
21
22 SigFactory's are used by creating one, setting the relevant
23 parameters, then calling the GetSignature() method each time a
24 signature is required.
25
26 """
27 - def __init__(self,featFactory,useCounts=False,minPointCount=2,maxPointCount=3,
28 shortestPathsOnly=True,includeBondOrder=False,skipFeats=None,
29 trianglePruneBins=True):
30 self.featFactory = featFactory
31 self.useCounts=useCounts
32 self.minPointCount=minPointCount
33 self.maxPointCount=maxPointCount
34 self.shortestPathsOnly=shortestPathsOnly
35 self.includeBondOrder=includeBondOrder
36 self.trianglePruneBins=trianglePruneBins
37 if skipFeats is None:
38 self.skipFeats=[]
39 else:
40 self.skipFeats=skipFeats
41 self._bins = None
42 self.sigKlass=None
43
44
46 """ bins should be a list of 2-tuples """
47 self._bins = copy.copy(bins)
48 self.Init()
49
53 return len(self._bins)
54
56 return self.sigKlass(self._sigSize)
57
59 nPts,combo,scaffold = self.GetBitInfo(bitIdx)
60 fams=self.GetFeatFamilies()
61 labels = [fams[x] for x in combo]
62 dMat = numpy.zeros((nPts,nPts),numpy.int)
63 dVect = Utils.nPointDistDict[nPts]
64 for idx in range(len(dVect)):
65 i,j = dVect[idx]
66 dMat[i,j] = scaffold[idx]
67 dMat[j,i] = scaffold[idx]
68
69 return nPts,combo,scaffold,labels,dMat
70
71 - def GetBitDescriptionAsText(self,bitIdx,includeBins=0,fullPage=1):
72 """ returns text with a description of the bit
73
74 **Arguments**
75
76 - bitIdx: an integer bit index
77
78 - includeBins: (optional) if nonzero, information about the bins will be
79 included as well
80
81 - fullPage: (optional) if nonzero, html headers and footers will
82 be included (so as to make the output a complete page)
83
84 **Returns**
85
86 a string with the HTML
87
88 """
89 nPts,combo,scaffold,labels,dMat=self._GetBitSummaryData(bitIdx)
90
92 """ returns a text description of the bit
93
94 **Arguments**
95
96 - bitIdx: an integer bit index
97
98 **Returns**
99
100 a string
101
102 """
103 nPts,combo,scaffold,labels,dMat=self._GetBitSummaryData(bitIdx)
104 res = " ".join(labels)+ " "
105 for row in dMat:
106 res += "|"+" ".join([str(x) for x in row])
107 res += "|"
108 return res
109
111 """ OBSOLETE: this has been rewritten in C++
112 Internal use only
113 Returns the index of a bin defined by a set of distances.
114
115 **Arguments**
116
117 - dists: a sequence of distances (not binned)
118
119 - bins: a sorted sequence of distance bins (2-tuples)
120
121 - scaffolds: a list of possible scaffolds (bin combinations)
122
123 **Returns**
124
125 an integer bin index
126
127 **Note**
128
129 the value returned here is not an index in the overall
130 signature. It is, rather, an offset of a scaffold in the
131 possible combinations of distance bins for a given
132 proto-pharmacophore.
133
134 """
135 nBins = len(bins)
136 nDists = len(dists)
137 whichBins = [0]*nDists
138
139
140
141
142
143
144 for i in range(nDists):
145 dist = dists[i]
146 where = -1
147
148
149 startP,endP = 0,len(bins)
150 while startP<endP:
151 midP = (startP+endP) // 2
152 begBin,endBin = bins[midP]
153 if dist < begBin:
154 endP = midP
155 elif dist >= endBin:
156 startP = midP+1
157 else:
158 where = midP
159 break
160 if where < 0:
161 return None
162 whichBins[i] = where
163 res = scaffolds.index(tuple(whichBins))
164 if _verbose:
165 print '----- _fBI -----------'
166 print ' scaffolds:',scaffolds
167 print ' bins:',whichBins
168 print ' res:',res
169 return res
170
175
177 featFamilies=self.GetFeatFamilies()
178 featMatches = {}
179 for fam in featFamilies:
180 featMatches[fam] = []
181 feats = self.featFactory.GetFeaturesForMol(mol,includeOnly=fam)
182 for feat in feats:
183 featMatches[fam].append(feat.GetAtomIds())
184 return [featMatches[x] for x in featFamilies]
185
186 - def GetBitIdx(self,featIndices,dists,sortIndices=True):
187 """ returns the index for a pharmacophore described using a set of
188 feature indices and distances
189
190 **Arguments***
191
192 - featIndices: a sequence of feature indices
193
194 - dists: a sequence of distance between the features, only the
195 unique distances should be included, and they should be in the
196 order defined in Utils.
197
198 - sortIndices : sort the indices
199
200 **Returns**
201
202 the integer bit index
203
204 """
205 nPoints = len(featIndices)
206 if nPoints>3:
207 raise NotImplementedError,'>3 points not supported'
208 if nPoints < self.minPointCount: raise IndexError,'bad number of points'
209 if nPoints > self.maxPointCount: raise IndexError,'bad number of points'
210
211
212 startIdx = self._starts[nPoints]
213
214
215
216
217 if sortIndices:
218 tmp = list(featIndices)
219 tmp.sort()
220 featIndices = tmp
221
222 if featIndices[0]<0: raise IndexError,'bad feature index'
223 if max(featIndices)>=self._nFeats: raise IndexError,'bad feature index'
224
225 if nPoints==3:
226 featIndices,dists=Utils.OrderTriangle(featIndices,dists)
227
228
229 offset = Utils.CountUpTo(self._nFeats,nPoints,featIndices)
230 if _verbose: print 'offset for feature %s: %d'%(str(featIndices),offset)
231 offset *= len(self._scaffolds[len(dists)])
232
233
234 try:
235 if _verbose:
236 print '>>>>>>>>>>>>>>>>>>>>>>>'
237 print '\tScaffolds:',repr(self._scaffolds[len(dists)]),type(self._scaffolds[len(dists)])
238 print '\tDists:',repr(dists),type(dists)
239 print '\tbins:',repr(self._bins),type(self._bins)
240 bin = self._findBinIdx(dists,self._bins,self._scaffolds[len(dists)])
241 except ValueError:
242 fams = self.GetFeatFamilies()
243 fams = [fams[x] for x in featIndices]
244 raise IndexError,'distance bin not found: feats: %s; dists=%s; bins=%s; scaffolds: %s'%(fams,dists,self._bins,self._scaffolds)
245
246 return startIdx + offset + bin
247
249 """ returns information about the given bit
250
251 **Arguments**
252
253 - idx: the bit index to be considered
254
255 **Returns**
256
257 a 3-tuple:
258
259 1) the number of points in the pharmacophore
260
261 2) the proto-pharmacophore (tuple of pattern indices)
262
263 3) the scaffold (tuple of distance indices)
264
265 """
266 if idx >= self._sigSize:
267 raise IndexError,'bad index (%d) queried. %d is the max'%(idx,self._sigSize)
268
269 nPts = self.minPointCount
270 while nPts < self.maxPointCount and self._starts[nPts+1]<=idx:
271 nPts+=1
272
273
274 offsetFromStart = idx - self._starts[nPts]
275 if _verbose:
276 print '\t %d Points, %d offset'%(nPts,offsetFromStart)
277
278
279 nDists = len(Utils.nPointDistDict[nPts])
280 scaffolds = self._scaffolds[nDists]
281
282 nScaffolds = len(scaffolds)
283
284
285 protoIdx = offsetFromStart / nScaffolds
286 indexCombos = Utils.GetIndexCombinations(self._nFeats,nPts)
287 combo = tuple(indexCombos[protoIdx])
288 if _verbose:
289 print '\t combo: %s'%(str(combo))
290
291
292 scaffoldIdx = offsetFromStart % nScaffolds
293 scaffold = scaffolds[scaffoldIdx]
294 if _verbose:
295 print '\t scaffold: %s'%(str(scaffold))
296 return nPts,combo,scaffold
297
299 """ Initializes internal parameters. This **must** be called after
300 making any changes to the signature parameters
301
302 """
303 accum = 0
304 self._scaffolds = [0]*(len(Utils.nPointDistDict[self.maxPointCount+1]))
305 self._starts = {}
306 if not self.skipFeats:
307 self._nFeats = len(self.featFactory.GetFeatureFamilies())
308 else:
309 self._nFeats = 0
310 for fam in self.featFactory.GetFeatureFamilies():
311 if fam not in self.skipFeats:
312 self._nFeats+=1
313 for i in range(self.minPointCount,self.maxPointCount+1):
314 self._starts[i] = accum
315 nDistsHere = len(Utils.nPointDistDict[i])
316 scaffoldsHere = Utils.GetPossibleScaffolds(i,self._bins,
317 useTriangleInequality=self.trianglePruneBins)
318 nBitsHere = len(scaffoldsHere)
319 self._scaffolds[nDistsHere] = scaffoldsHere
320 pointsHere = Utils.NumCombinations(self._nFeats,i) * nBitsHere
321 accum += pointsHere
322 self._sigSize = accum
323 if not self.useCounts:
324 self.sigKlass = SparseBitVect
325 elif self._sigSize<2**31:
326 self.sigKlass = IntSparseIntVect
327 else:
328 self.sigKlass = LongSparseIntVect
329
332 try:
333 from rdkit.Chem.Pharmacophores import cUtils
334 except ImportError:
335 pass
336 else:
337 SigFactory._findBinIdx = cUtils.FindBinIdx
338