Package Chem :: Package Pharm2D :: Module Signature
[hide private]
[frames] | no frames]

Source Code for Module Chem.Pharm2D.Signature

  1  # $Id: Signature.py 455 2007-12-18 06:37:48Z glandrum $ 
  2  # 
  3  # Copyright (C) 2002-2006 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  """ data structures for holding 2D pharmacophore signatures 
  8   
  9   
 10    See Docs/Chem/Pharm2D.triangles.jpg for an illustration of the way 
 11    pharmacophores are broken into triangles and labelled. 
 12   
 13    See Docs/Chem/Pharm2D.signatures.jpg for an illustration of bit 
 14    numbering 
 15   
 16  """ 
 17  import Chem 
 18  from Chem.Pharm2D import Utils 
 19  from DataStructs import SparseBitVect as BitVect 
 20  from Numeric import * 
 21   
 22  _verbose = 0 
 23   
 24  _sigVersion=100 
 25   
26 -class Pharm2DSig(object):
27 """ 28 29 **Notes on Use** 30 31 - After any changes, the Init() method should be called 32 33 **Important Attributes** 34 35 - patterns: a list of SmartsPatterns used to determine the features 36 patterns can be initialized directly from the list or from a list of SMARTS 37 strings 38 39 - bins: the list of distance bins to be used. 40 41 - minCount/maxCount: the minimum/maximum number of points to be included 42 in a pharmacophore 43 44 - shortestPathsOnly: toggles limiting the path-discovery algorithms to 45 only find the shortest paths between 2 features. 46 47 - includeBondOrder: toggles inclusion of bond order in consideration of 48 shortest paths. 49 50 **Limitations of Current Implementation** 51 52 - All distances have the same numbers of bins 53 54 - Only shortest-path matches are implemented 55 56 57 """
58 - def __init__(self,patts=None,bins=None,labels=None,minCnt=2,maxCnt=4, 59 shortestPathsOnly=1,includeBondOrder=0):
60 self._sigVersion = _sigVersion 61 self._bv = None 62 self._patts = None 63 if patts is not None: 64 self._patts = patts[:] 65 self._labels = None 66 if labels is not None: 67 self._labels = labels[:] 68 69 self.bins = None 70 if bins is not None: 71 self.bins = bins[:] 72 self._minCnt = minCnt 73 self._maxCnt = maxCnt 74 self._shortestPathsOnly = shortestPathsOnly 75 self._includeBondOrder = includeBondOrder 76 self._initLocals()
77
78 - def _initLocals(self):
79 """ Internal use only 80 81 """ 82 self._bv = None 83 self._size = -1 84 self._starts = {} 85 self._scaffolds = []
86 87
88 - def __getstate__(self):
89 """ used by the pickling machinery 90 91 """ 92 res = {'_minCnt':self._minCnt, 93 '_maxCnt':self._maxCnt, 94 '_shortestPathsOnly':self._shortestPathsOnly, 95 '_includeBondOrder':self._includeBondOrder, 96 'bins': self.bins, 97 '_bv':self._bv, 98 '_labels':self._labels, 99 '_sigVersion':self._sigVersion, 100 } 101 res['_patts'] = [Chem.MolToSmarts(x) for x in self._patts] 102 103 return res
104 - def __setstate__(self,state):
105 """ used by the pickling machinery 106 107 """ 108 self.__dict__ = state 109 patts = state['_patts'] 110 self.SetPatternsFromSmarts(patts) 111 bv = self._bv 112 self._initLocals() 113 self._bv = bv 114 try: 115 self._sigVersion 116 except AttributeError: 117 self._sigVersion = _sigVersion 118 self.Init(createBitVect=0)
119 - def __len__(self):
120 return self.GetSize()
121 - def __getitem__(self,idx):
122 if idx < 0 or idx >= self.GetSize(): 123 raise IndexError,'Index %d invalid'%(idx) 124 return self._bv[idx]
125 126
127 - def SetPatterns(self,patts):
128 self._patts = patts[:]
129 - def SetPatternsFromSmarts(self,smarts):
130 import Chem 131 self._patts = [None]*len(smarts) 132 for i in range(len(smarts)): 133 p = Chem.MolFromSmarts(smarts[i]) 134 self._patts[i] = p
135 - def GetPattern(self,which):
136 return self._patts[which]
137 - def GetNumPatterns(self):
138 return len(self._patts)
139
140 - def SetLabels(self,labels):
141 self._labels = labels[:]
142 - def GetLabel(self,which):
143 return self._labels[which]
144 145
146 - def SetBins(self,bins):
147 """ bins should be a list of 2-tuples """ 148 self.bins = bins[:]
149 - def GetBin(self,which):
150 return self.bins[which]
151 - def GetNumBins(self):
152 return len(self.bins)
153 - def GetMinDist(self):
154 return self.bins[0][0]
155 - def GetMaxDist(self):
156 return self.bins[-1][1]
157 158
159 - def SetMinCount(self,min):
160 self._minCnt = min
161 - def GetMinCount(self):
162 return self._minCnt
163
164 - def SetMaxCount(self,max):
165 self._maxCnt = max
166 - def GetMaxCount(self):
167 return self._maxCnt
168
169 - def SetShortestPathsOnly(self,val):
170 if not val: 171 raise ValueError,'only shortest paths signatures are currently supported' 172 self._shortestPathsOnly = val
173 - def GetShortestPathsOnly(self):
174 return self._shortestPathsOnly
175
176 - def SetIncludeBondOrder(self,val):
177 self._includeBondOrder = val
178 - def GetIncludeBondOrder(self):
179 return self._includeBondOrder
180
181 - def GetSize(self):
182 return self._size
183
184 - def Init(self,createBitVect=1):
185 """ Initializes internal parameters. This **must** be called after 186 making any changes to the signature 187 188 **Arguments** 189 190 - createBitVect: (optional) if this is nonzero, the bit vector 191 used to store the on bits will be allocated. Otherwise the 192 existing bit vect will be reused (it better be big enough) 193 194 """ 195 accum = 0 196 self._scaffolds = [0]*(len(Utils.nPointDistDict[self.GetMaxCount()+1])) 197 for i in range(self.GetMinCount(),self.GetMaxCount()+1): 198 self._starts[i] = accum 199 nDistsHere = len(Utils.nPointDistDict[i]) 200 scaffoldsHere = Utils.GetPossibleScaffolds(i,self.bins) 201 nBitsHere = len(scaffoldsHere) 202 self._scaffolds[nDistsHere] = scaffoldsHere 203 pointsHere = Utils.NumCombinations(self.GetNumPatterns(),i) * nBitsHere 204 205 accum += pointsHere 206 self._size = accum 207 if createBitVect: 208 self._bv = BitVect(self._size)
209 210
211 - def _findBinIdx(self,dists,bins,scaffolds):
212 """ OBSOLETE: this has been rewritten in C++ 213 Internal use only 214 Returns the index of a bin defined by a set of distances. 215 216 **Arguments** 217 218 - dists: a sequence of distances (not binned) 219 220 - bins: a sorted sequence of distance bins (2-tuples) 221 222 - scaffolds: a list of possible scaffolds (bin combinations) 223 224 **Returns** 225 226 an integer bin index 227 228 **Note** 229 230 the value returned here is not an index in the overall 231 signature. It is, rather, an offset of a scaffold in the 232 possible combinations of distance bins for a given 233 proto-pharmacophore. 234 235 """ 236 nBins = len(bins) 237 nDists = len(dists) 238 whichBins = [0]*nDists 239 240 # This would be a ton easier if we had contiguous bins 241 # i.e. if we could maintain the bins as a list of bounds) 242 # because then we could use Python's bisect module. 243 # Since we can't do that, we've got to do our own binary 244 # search here. 245 for i in range(nDists): 246 dist = dists[i] 247 where = -1 248 249 # do a simple binary search: 250 startP,endP = 0,len(bins) 251 while startP<endP: 252 midP = (startP+endP) // 2 253 begBin,endBin = bins[midP] 254 if dist < begBin: 255 endP = midP 256 elif dist >= endBin: 257 startP = midP+1 258 else: 259 where = midP 260 break 261 if where < 0: 262 return None 263 whichBins[i] = where 264 res = scaffolds.index(tuple(whichBins)) 265 return res
266
267 - def GetBitIdx(self,patts,dists,checkPatts=1):
268 """ returns the index for a pharmacophore described using a set of 269 patterns and distances 270 271 **Arguments*** 272 273 - patts: a sequence of pattern indices 274 275 - dists: a sequence of distance between the patterns, only the 276 unique distances should be included, and they should be in the 277 order defined in Utils. 278 279 - checkPatts: (optional) if nonzero, the pattern vector is 280 checked to ensure it is sorted 281 282 283 **Returns** 284 285 the integer bit index 286 287 """ 288 nPoints = len(patts) 289 if nPoints < self._minCnt: raise IndexError,'bad number of patterns' 290 if nPoints > self._maxCnt: raise IndexError,'bad number of patterns' 291 292 # this is the start of the nPoint-point pharmacophores 293 startIdx = self._starts[nPoints] 294 295 # 296 # now we need to map the pattern indices to an offset from startIdx 297 # 298 nPatts = len(self._patts) 299 if checkPatts: 300 tmp = list(patts) 301 tmp.sort() 302 if tmp!=list(patts): 303 raise ValueError,'pattern vector not sorted' 304 #patts = list(patts) 305 #patts.sort() 306 if patts[0]<0: raise IndexError,'bad pattern index' 307 if max(patts)>=nPatts: raise IndexError,'bad pattern index' 308 offset = Utils.CountUpTo(nPatts,nPoints,patts) 309 if _verbose: print 'offset for patts %s: %d'%(str(patts),offset) 310 offset *= len(self._scaffolds[len(dists)]) 311 312 try: 313 if _verbose: 314 print '>>>>>>>>>>>>>>>>>>>>>>>' 315 print '\tScaffolds:',repr(self._scaffolds[len(dists)]),type(self._scaffolds[len(dists)]) 316 print '\tDists:',repr(dists),type(dists) 317 print '\tbins:',repr(self.bins),type(self.bins) 318 bin = self._findBinIdx(dists,self.bins,self._scaffolds[len(dists)]) 319 except ValueError: 320 raise IndexError,'distance bin not found' 321 322 return startIdx + offset + bin
323
324 - def SetBit(self,patts,dists,checkPatts=1):
325 """ sets the bit defined by a collection of patterns and distances 326 327 **Arguments*** 328 329 - patts: a sequence of pattern indices 330 331 - dists: a sequence of distance between the patterns, only the 332 unique distances should be included, and they should be in the 333 order defined in Utils. 334 335 - checkPatts: (optional) if nonzero, the pattern vector is 336 checked to ensure it is sorted 337 338 **Returns** 339 340 the original status of the bit (whether or not it was set) 341 342 """ 343 idx = self.GetBitIdx(patts,dists,checkPatts=checkPatts) 344 if _verbose: 345 print '*--> setting bit: %d'%(idx) 346 print '\tfrom patts: %s and dists: %s\n'%(repr(patts),repr(dists)) 347 if idx >= self.GetSize(): 348 raise IndexError,'bad index (%d) calculated. %d is the max'%(idx,self.GetSize()) 349 return self._bv.SetBit(idx)
350
351 - def GetBit(self,patts,dists,checkPatts=1):
352 """ returns the value of a 353 354 **Arguments*** 355 356 - patts: a sequence of pattern indices 357 358 - dists: a sequence of distance between the patterns, only the 359 unique distances should be included, and they should be in the 360 order defined in Utils. 361 362 - checkPatts: (optional) if nonzero, the pattern vector is 363 checked to ensure it is sorted 364 365 **Returns** 366 367 whether or not the bit is set 368 369 """ 370 idx = self.GetBitIdx(patts,dists,checkPatts=checkPatts) 371 if idx >= self.GetSize(): 372 raise IndexError,'bad index (%d) calculated. %d is the max'%(idx,self.GetSize()) 373 return self._bv.GetBit(idx)
374
375 - def GetOnBits(self):
376 """ returns our on bits 377 378 """ 379 return self._bv.GetOnBits()
380
381 - def GetBitInfo(self,idx):
382 """ returns information about the given bit 383 384 **Arguments** 385 386 - idx: the bit index to be considered 387 388 **Returns** 389 390 a 3-tuple: 391 392 1) the number of points in the pharmacophore 393 394 2) the proto-pharmacophore (tuple of pattern indices) 395 396 3) the scaffold (tuple of distance indices) 397 398 """ 399 if idx >= self.GetSize(): 400 raise IndexError,'bad index (%d) queried. %d is the max'%(idx,self.GetSize()) 401 # first figure out how many points are in the p'cophore 402 nPts = self.GetMinCount() 403 while nPts < self.GetMaxCount() and self._starts[nPts+1]<=idx: 404 nPts+=1 405 406 # how far are we in from the start point? 407 offsetFromStart = idx - self._starts[nPts] 408 if _verbose: 409 print '\t %d Points, %d offset'%(nPts,offsetFromStart) 410 411 # lookup the number of scaffolds 412 nDists = len(Utils.nPointDistDict[nPts]) 413 scaffolds = self._scaffolds[nDists] 414 415 nScaffolds = len(scaffolds) 416 417 # figure out to which proto-pharmacophore we belong: 418 protoIdx = offsetFromStart / nScaffolds 419 indexCombos = Utils.GetIndexCombinations(self.GetNumPatterns(),nPts) 420 combo = indexCombos[protoIdx] 421 if _verbose: 422 print '\t combo: %s'%(str(combo)) 423 424 # and which scaffold: 425 scaffoldIdx = offsetFromStart % nScaffolds 426 scaffold = scaffolds[scaffoldIdx] 427 if _verbose: 428 print '\t scaffold: %s'%(str(scaffold)) 429 430 return nPts,combo,scaffold
431
432 - def GetBitDescription(self,bitIdx,includeBins=0,fullPage=1):
433 """ returns HTML with a description of the bit 434 435 **Arguments** 436 437 - bitIdx: an integer bit index 438 439 - includeBins: (optional) if nonzero, information about the bins will be 440 included as well 441 442 - fullPage: (optional) if nonzero, html headers and footers will 443 be included (so as to make the output a complete page) 444 445 **Returns** 446 447 a string with the HTML 448 449 """ 450 nPts,combo,scaffold = self.GetBitInfo(bitIdx) 451 labels = [self._labels[x] for x in combo] 452 dMat = zeros((nPts,nPts),Int) 453 dVect = Utils.nPointDistDict[nPts] 454 for idx in range(len(dVect)): 455 i,j = dVect[idx] 456 dMat[i,j] = scaffold[idx] 457 dMat[j,i] = scaffold[idx] 458 if fullPage: 459 lines = ['<html><body>'] 460 else: 461 lines = [] 462 lines.append("""<h2>Bit %d</h2> 463 <p><b>Num Points:</b> %d 464 """%(bitIdx,nPts)) 465 lines.append('<p><b>Distances</b><table border=1>') 466 hdr = ' '.join(['<th>%s</th>'%x for x in labels]) 467 lines.append('<tr><td></td>%s</tr>'%(hdr)) 468 for i in range(nPts): 469 row = ' '.join(['<td>%s</td>'%(str(dMat[i,x])) for x in range(nPts)]) 470 lines.append('<tr><th>%s</th>%s</tr>'%(labels[i],row)) 471 lines.append('</table>') 472 473 if includeBins: 474 lines.append('<p> <b>Distance Bin Information</b>') 475 lines.append('<table border=1>') 476 lines.append('<tr><td>bin</td><td>begin</td><td>end</td></tr>') 477 for idx in range(self.GetNumBins()): 478 beg,end = self.GetBin(idx) 479 lines.append('<tr><td>%d</td><td>%d</td><td>%d</td></tr>'%(idx,beg,end)) 480 lines.append('</table>') 481 if fullPage: 482 lines.append("</body></html>") 483 return '\n'.join(lines)
484 485 486 487 try: 488 from Chem.Pharmacophores import cUtils 489 except ImportError: 490 pass 491 else: 492 Pharm2DSig._findBinIdx = cUtils.FindBinIdx 493 494 495 if __name__=='__main__':
496 - def test1():
497 sig = Pharm2DSig() 498 sig.SetPatternsFromSmarts(['O','N']) 499 sig.SetBins([(1,2),(2,4),(4,8)]) 500 sig.SetMinCount(2) 501 sig.SetMaxCount(3) 502 sig.Init() 503 print sig.GetSize()
504
505 - def test2():
506 sig = Pharm2DSig() 507 sig.SetPatternsFromSmarts(['O','N']) 508 sig.SetBins([(0,2),(2,4),(4,8)]) 509 sig.SetMinCount(2) 510 sig.SetMaxCount(3) 511 sig.Init() 512 vs = [((0,0),[1]),((1,1),[1]),((0,0),[2]),((0,0),[6]),((0,1),[1])] 513 for patts,dist in vs: 514 idx = sig.GetBitIdx(patts,dist) 515 print patts,dist,idx
516
517 - def test4():
518 import Chem 519 import Generate 520 sig = Pharm2DSig() 521 sig.SetPatternsFromSmarts(['O']) 522 sig.SetBins([(1,3),(3,4),(4,8)]) 523 sig.SetMinCount(2) 524 sig.SetMaxCount(3) 525 sig.Init() 526 #print '---------c' 527 #patts,dist = (0,0),[4] 528 #idx = sig.GetBitIdx(patts,dist) 529 #print patts,dist,idx 530 mol = Chem.MolFromSmiles('OCCC1COCCO1') 531 try: 532 Generate.Gen2DFingerprint(mol,sig) 533 except TypeError: 534 import traceback 535 traceback.print_exc() 536 print '---------c' 537 patts,dist = [0,0],[4] 538 #idx = sig.GetBitIdx(patts,dist) 539 #print patts,dist,idx 540 sig.SetBit(patts,dist)
541 542 543 544
545 - def test3():
546 sig = Pharm2DSig() 547 sig.SetPatternsFromSmarts(['[OD1]','[OD2]','[ND2]','[N]']) 548 sig.SetBins([(0,2),(2,4),(4,6),(6,8),(8,100)]) 549 sig.SetMinCount(2) 550 sig.SetMaxCount(4) 551 sig.Init() 552 vs = [((0,0),[1]),((1,1),[1]),((0,0),[2]),((0,0),[6]),((0,1),[1]),((0,0,0),[1,1,1]),((0,0,0),[1,1,3]), 553 ((0,0,0),[3,1,2]),((0,0,1),[1,1,1]),] 554 for patts,dist in vs: 555 print patts,dist,sig.GetBitIdx(patts,dist)
556 557 test2() 558