1
2
3
4
5
6
7 """ data structures for holding 2D pharmacophore signatures
8
9
10 See Docs/Chem/Pharm2D.triangles.jpg for an illustration of the way
11 pharmacophores are broken into triangles and labelled.
12
13 See Docs/Chem/Pharm2D.signatures.jpg for an illustration of bit
14 numbering
15
16 """
17 import Chem
18 from Chem.Pharm2D import Utils
19 from DataStructs import SparseBitVect as BitVect
20 from Numeric import *
21
22 _verbose = 0
23
24 _sigVersion=100
25
27 """
28
29 **Notes on Use**
30
31 - After any changes, the Init() method should be called
32
33 **Important Attributes**
34
35 - patterns: a list of SmartsPatterns used to determine the features
36 patterns can be initialized directly from the list or from a list of SMARTS
37 strings
38
39 - bins: the list of distance bins to be used.
40
41 - minCount/maxCount: the minimum/maximum number of points to be included
42 in a pharmacophore
43
44 - shortestPathsOnly: toggles limiting the path-discovery algorithms to
45 only find the shortest paths between 2 features.
46
47 - includeBondOrder: toggles inclusion of bond order in consideration of
48 shortest paths.
49
50 **Limitations of Current Implementation**
51
52 - All distances have the same numbers of bins
53
54 - Only shortest-path matches are implemented
55
56
57 """
58 - def __init__(self,patts=None,bins=None,labels=None,minCnt=2,maxCnt=4,
59 shortestPathsOnly=1,includeBondOrder=0):
60 self._sigVersion = _sigVersion
61 self._bv = None
62 self._patts = None
63 if patts is not None:
64 self._patts = patts[:]
65 self._labels = None
66 if labels is not None:
67 self._labels = labels[:]
68
69 self.bins = None
70 if bins is not None:
71 self.bins = bins[:]
72 self._minCnt = minCnt
73 self._maxCnt = maxCnt
74 self._shortestPathsOnly = shortestPathsOnly
75 self._includeBondOrder = includeBondOrder
76 self._initLocals()
77
79 """ Internal use only
80
81 """
82 self._bv = None
83 self._size = -1
84 self._starts = {}
85 self._scaffolds = []
86
87
89 """ used by the pickling machinery
90
91 """
92 res = {'_minCnt':self._minCnt,
93 '_maxCnt':self._maxCnt,
94 '_shortestPathsOnly':self._shortestPathsOnly,
95 '_includeBondOrder':self._includeBondOrder,
96 'bins': self.bins,
97 '_bv':self._bv,
98 '_labels':self._labels,
99 '_sigVersion':self._sigVersion,
100 }
101 res['_patts'] = [Chem.MolToSmarts(x) for x in self._patts]
102
103 return res
122 if idx < 0 or idx >= self.GetSize():
123 raise IndexError,'Index %d invalid'%(idx)
124 return self._bv[idx]
125
126
128 self._patts = patts[:]
130 import Chem
131 self._patts = [None]*len(smarts)
132 for i in range(len(smarts)):
133 p = Chem.MolFromSmarts(smarts[i])
134 self._patts[i] = p
136 return self._patts[which]
138 return len(self._patts)
139
143 return self._labels[which]
144
145
147 """ bins should be a list of 2-tuples """
148 self.bins = bins[:]
150 return self.bins[which]
152 return len(self.bins)
154 return self.bins[0][0]
156 return self.bins[-1][1]
157
158
163
168
170 if not val:
171 raise ValueError,'only shortest paths signatures are currently supported'
172 self._shortestPathsOnly = val
174 return self._shortestPathsOnly
175
177 self._includeBondOrder = val
179 return self._includeBondOrder
180
183
184 - def Init(self,createBitVect=1):
185 """ Initializes internal parameters. This **must** be called after
186 making any changes to the signature
187
188 **Arguments**
189
190 - createBitVect: (optional) if this is nonzero, the bit vector
191 used to store the on bits will be allocated. Otherwise the
192 existing bit vect will be reused (it better be big enough)
193
194 """
195 accum = 0
196 self._scaffolds = [0]*(len(Utils.nPointDistDict[self.GetMaxCount()+1]))
197 for i in range(self.GetMinCount(),self.GetMaxCount()+1):
198 self._starts[i] = accum
199 nDistsHere = len(Utils.nPointDistDict[i])
200 scaffoldsHere = Utils.GetPossibleScaffolds(i,self.bins)
201 nBitsHere = len(scaffoldsHere)
202 self._scaffolds[nDistsHere] = scaffoldsHere
203 pointsHere = Utils.NumCombinations(self.GetNumPatterns(),i) * nBitsHere
204
205 accum += pointsHere
206 self._size = accum
207 if createBitVect:
208 self._bv = BitVect(self._size)
209
210
212 """ OBSOLETE: this has been rewritten in C++
213 Internal use only
214 Returns the index of a bin defined by a set of distances.
215
216 **Arguments**
217
218 - dists: a sequence of distances (not binned)
219
220 - bins: a sorted sequence of distance bins (2-tuples)
221
222 - scaffolds: a list of possible scaffolds (bin combinations)
223
224 **Returns**
225
226 an integer bin index
227
228 **Note**
229
230 the value returned here is not an index in the overall
231 signature. It is, rather, an offset of a scaffold in the
232 possible combinations of distance bins for a given
233 proto-pharmacophore.
234
235 """
236 nBins = len(bins)
237 nDists = len(dists)
238 whichBins = [0]*nDists
239
240
241
242
243
244
245 for i in range(nDists):
246 dist = dists[i]
247 where = -1
248
249
250 startP,endP = 0,len(bins)
251 while startP<endP:
252 midP = (startP+endP) // 2
253 begBin,endBin = bins[midP]
254 if dist < begBin:
255 endP = midP
256 elif dist >= endBin:
257 startP = midP+1
258 else:
259 where = midP
260 break
261 if where < 0:
262 return None
263 whichBins[i] = where
264 res = scaffolds.index(tuple(whichBins))
265 return res
266
267 - def GetBitIdx(self,patts,dists,checkPatts=1):
268 """ returns the index for a pharmacophore described using a set of
269 patterns and distances
270
271 **Arguments***
272
273 - patts: a sequence of pattern indices
274
275 - dists: a sequence of distance between the patterns, only the
276 unique distances should be included, and they should be in the
277 order defined in Utils.
278
279 - checkPatts: (optional) if nonzero, the pattern vector is
280 checked to ensure it is sorted
281
282
283 **Returns**
284
285 the integer bit index
286
287 """
288 nPoints = len(patts)
289 if nPoints < self._minCnt: raise IndexError,'bad number of patterns'
290 if nPoints > self._maxCnt: raise IndexError,'bad number of patterns'
291
292
293 startIdx = self._starts[nPoints]
294
295
296
297
298 nPatts = len(self._patts)
299 if checkPatts:
300 tmp = list(patts)
301 tmp.sort()
302 if tmp!=list(patts):
303 raise ValueError,'pattern vector not sorted'
304
305
306 if patts[0]<0: raise IndexError,'bad pattern index'
307 if max(patts)>=nPatts: raise IndexError,'bad pattern index'
308 offset = Utils.CountUpTo(nPatts,nPoints,patts)
309 if _verbose: print 'offset for patts %s: %d'%(str(patts),offset)
310 offset *= len(self._scaffolds[len(dists)])
311
312 try:
313 if _verbose:
314 print '>>>>>>>>>>>>>>>>>>>>>>>'
315 print '\tScaffolds:',repr(self._scaffolds[len(dists)]),type(self._scaffolds[len(dists)])
316 print '\tDists:',repr(dists),type(dists)
317 print '\tbins:',repr(self.bins),type(self.bins)
318 bin = self._findBinIdx(dists,self.bins,self._scaffolds[len(dists)])
319 except ValueError:
320 raise IndexError,'distance bin not found'
321
322 return startIdx + offset + bin
323
324 - def SetBit(self,patts,dists,checkPatts=1):
325 """ sets the bit defined by a collection of patterns and distances
326
327 **Arguments***
328
329 - patts: a sequence of pattern indices
330
331 - dists: a sequence of distance between the patterns, only the
332 unique distances should be included, and they should be in the
333 order defined in Utils.
334
335 - checkPatts: (optional) if nonzero, the pattern vector is
336 checked to ensure it is sorted
337
338 **Returns**
339
340 the original status of the bit (whether or not it was set)
341
342 """
343 idx = self.GetBitIdx(patts,dists,checkPatts=checkPatts)
344 if _verbose:
345 print '*--> setting bit: %d'%(idx)
346 print '\tfrom patts: %s and dists: %s\n'%(repr(patts),repr(dists))
347 if idx >= self.GetSize():
348 raise IndexError,'bad index (%d) calculated. %d is the max'%(idx,self.GetSize())
349 return self._bv.SetBit(idx)
350
351 - def GetBit(self,patts,dists,checkPatts=1):
352 """ returns the value of a
353
354 **Arguments***
355
356 - patts: a sequence of pattern indices
357
358 - dists: a sequence of distance between the patterns, only the
359 unique distances should be included, and they should be in the
360 order defined in Utils.
361
362 - checkPatts: (optional) if nonzero, the pattern vector is
363 checked to ensure it is sorted
364
365 **Returns**
366
367 whether or not the bit is set
368
369 """
370 idx = self.GetBitIdx(patts,dists,checkPatts=checkPatts)
371 if idx >= self.GetSize():
372 raise IndexError,'bad index (%d) calculated. %d is the max'%(idx,self.GetSize())
373 return self._bv.GetBit(idx)
374
376 """ returns our on bits
377
378 """
379 return self._bv.GetOnBits()
380
382 """ returns information about the given bit
383
384 **Arguments**
385
386 - idx: the bit index to be considered
387
388 **Returns**
389
390 a 3-tuple:
391
392 1) the number of points in the pharmacophore
393
394 2) the proto-pharmacophore (tuple of pattern indices)
395
396 3) the scaffold (tuple of distance indices)
397
398 """
399 if idx >= self.GetSize():
400 raise IndexError,'bad index (%d) queried. %d is the max'%(idx,self.GetSize())
401
402 nPts = self.GetMinCount()
403 while nPts < self.GetMaxCount() and self._starts[nPts+1]<=idx:
404 nPts+=1
405
406
407 offsetFromStart = idx - self._starts[nPts]
408 if _verbose:
409 print '\t %d Points, %d offset'%(nPts,offsetFromStart)
410
411
412 nDists = len(Utils.nPointDistDict[nPts])
413 scaffolds = self._scaffolds[nDists]
414
415 nScaffolds = len(scaffolds)
416
417
418 protoIdx = offsetFromStart / nScaffolds
419 indexCombos = Utils.GetIndexCombinations(self.GetNumPatterns(),nPts)
420 combo = indexCombos[protoIdx]
421 if _verbose:
422 print '\t combo: %s'%(str(combo))
423
424
425 scaffoldIdx = offsetFromStart % nScaffolds
426 scaffold = scaffolds[scaffoldIdx]
427 if _verbose:
428 print '\t scaffold: %s'%(str(scaffold))
429
430 return nPts,combo,scaffold
431
433 """ returns HTML with a description of the bit
434
435 **Arguments**
436
437 - bitIdx: an integer bit index
438
439 - includeBins: (optional) if nonzero, information about the bins will be
440 included as well
441
442 - fullPage: (optional) if nonzero, html headers and footers will
443 be included (so as to make the output a complete page)
444
445 **Returns**
446
447 a string with the HTML
448
449 """
450 nPts,combo,scaffold = self.GetBitInfo(bitIdx)
451 labels = [self._labels[x] for x in combo]
452 dMat = zeros((nPts,nPts),Int)
453 dVect = Utils.nPointDistDict[nPts]
454 for idx in range(len(dVect)):
455 i,j = dVect[idx]
456 dMat[i,j] = scaffold[idx]
457 dMat[j,i] = scaffold[idx]
458 if fullPage:
459 lines = ['<html><body>']
460 else:
461 lines = []
462 lines.append("""<h2>Bit %d</h2>
463 <p><b>Num Points:</b> %d
464 """%(bitIdx,nPts))
465 lines.append('<p><b>Distances</b><table border=1>')
466 hdr = ' '.join(['<th>%s</th>'%x for x in labels])
467 lines.append('<tr><td></td>%s</tr>'%(hdr))
468 for i in range(nPts):
469 row = ' '.join(['<td>%s</td>'%(str(dMat[i,x])) for x in range(nPts)])
470 lines.append('<tr><th>%s</th>%s</tr>'%(labels[i],row))
471 lines.append('</table>')
472
473 if includeBins:
474 lines.append('<p> <b>Distance Bin Information</b>')
475 lines.append('<table border=1>')
476 lines.append('<tr><td>bin</td><td>begin</td><td>end</td></tr>')
477 for idx in range(self.GetNumBins()):
478 beg,end = self.GetBin(idx)
479 lines.append('<tr><td>%d</td><td>%d</td><td>%d</td></tr>'%(idx,beg,end))
480 lines.append('</table>')
481 if fullPage:
482 lines.append("</body></html>")
483 return '\n'.join(lines)
484
485
486
487 try:
488 from Chem.Pharmacophores import cUtils
489 except ImportError:
490 pass
491 else:
492 Pharm2DSig._findBinIdx = cUtils.FindBinIdx
493
494
495 if __name__=='__main__':
504
506 sig = Pharm2DSig()
507 sig.SetPatternsFromSmarts(['O','N'])
508 sig.SetBins([(0,2),(2,4),(4,8)])
509 sig.SetMinCount(2)
510 sig.SetMaxCount(3)
511 sig.Init()
512 vs = [((0,0),[1]),((1,1),[1]),((0,0),[2]),((0,0),[6]),((0,1),[1])]
513 for patts,dist in vs:
514 idx = sig.GetBitIdx(patts,dist)
515 print patts,dist,idx
516
518 import Chem
519 import Generate
520 sig = Pharm2DSig()
521 sig.SetPatternsFromSmarts(['O'])
522 sig.SetBins([(1,3),(3,4),(4,8)])
523 sig.SetMinCount(2)
524 sig.SetMaxCount(3)
525 sig.Init()
526
527
528
529
530 mol = Chem.MolFromSmiles('OCCC1COCCO1')
531 try:
532 Generate.Gen2DFingerprint(mol,sig)
533 except TypeError:
534 import traceback
535 traceback.print_exc()
536 print '---------c'
537 patts,dist = [0,0],[4]
538
539
540 sig.SetBit(patts,dist)
541
542
543
544
546 sig = Pharm2DSig()
547 sig.SetPatternsFromSmarts(['[OD1]','[OD2]','[ND2]','[N]'])
548 sig.SetBins([(0,2),(2,4),(4,6),(6,8),(8,100)])
549 sig.SetMinCount(2)
550 sig.SetMaxCount(4)
551 sig.Init()
552 vs = [((0,0),[1]),((1,1),[1]),((0,0),[2]),((0,0),[6]),((0,1),[1]),((0,0,0),[1,1,1]),((0,0,0),[1,1,3]),
553 ((0,0,0),[3,1,2]),((0,0,1),[1,1,1]),]
554 for patts,dist in vs:
555 print patts,dist,sig.GetBitIdx(patts,dist)
556
557 test2()
558