Package rdkit :: Package Chem :: Package Pharm2D :: Module Utils
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Pharm2D.Utils

  1  # $Id: Utils.py 1022 2009-03-19 04:46:11Z glandrum $ 
  2  # 
  3  # Copyright (C) 2002-2006 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  """ utility functionality for the 2D pharmacophores code 
  8   
  9    See Docs/Chem/Pharm2D.triangles.jpg for an illustration of the way 
 10    pharmacophores are broken into triangles and labelled. 
 11   
 12    See Docs/Chem/Pharm2D.signatures.jpg for an illustration of bit 
 13    numbering 
 14   
 15  """ 
 16  import numpy 
 17   
 18  # 
 19  #  number of points in a scaffold -> sequence of distances (p1,p2) in 
 20  #   the scaffold    
 21  # 
 22  nPointDistDict = { 
 23    2: ((0,1),), 
 24    3: ((0,1),(0,2), 
 25        (1,2)), 
 26    4: ((0,1),(0,2),(0,3), 
 27        (1,2),(2,3)), 
 28    5: ((0,1),(0,2),(0,3),(0,4), 
 29        (1,2),(2,3),(3,4)), 
 30    6: ((0,1),(0,2),(0,3),(0,4),(0,5), 
 31        (1,2),(2,3),(3,4),(4,5)), 
 32    7: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6), 
 33        (1,2),(2,3),(3,4),(4,5),(5,6)), 
 34    8: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7), 
 35        (1,2),(2,3),(3,4),(4,5),(5,6),(6,7)), 
 36    9: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7),(0,8), 
 37        (1,2),(2,3),(3,4),(4,5),(5,6),(6,7),(7,8)), 
 38    10: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7),(0,8),(0,9), 
 39         (1,2),(2,3),(3,4),(4,5),(5,6),(6,7),(7,8),(8,9)), 
 40    } 
 41   
 42  # 
 43  #  number of distances in a scaffold -> number of points in the scaffold 
 44  # 
 45  nDistPointDict = { 
 46    1:2, 
 47    3:3, 
 48    5:4, 
 49    7:5, 
 50    9:6, 
 51    11:7, 
 52    13:8, 
 53    15:9, 
 54    17:10, 
 55    } 
 56   
 57  _trianglesInPharmacophore = {} 
58 -def GetTriangles(nPts):
59 """ returns a tuple with the distance indices for 60 triangles composing an nPts-pharmacophore 61 62 """ 63 global _trianglesInPharmacophore 64 if nPts < 3: return [] 65 res = _trianglesInPharmacophore.get(nPts,[]) 66 if not res: 67 idx1,idx2,idx3=(0,1,nPts-1) 68 while idx1<nPts-2: 69 res.append((idx1,idx2,idx3)) 70 idx1 += 1 71 idx2 += 1 72 idx3 += 1 73 res = tuple(res) 74 _trianglesInPharmacophore[nPts] = res 75 return res
76 77
78 -def _fact(x):
79 if x <= 1: 80 return 1 81 82 accum = 1 83 for i in range(x): 84 accum *= i+1 85 return accum
86
87 -def BinsTriangleInequality(d1,d2,d3):
88 """ checks the triangle inequality for combinations 89 of distance bins. 90 91 the general triangle inequality is: 92 d1 + d2 >= d3 93 the conservative binned form of this is: 94 d1(upper) + d2(upper) >= d3(lower) 95 96 """ 97 if d1[1]+d2[1]<d3[0]: return False 98 if d2[1]+d3[1]<d1[0]: return False 99 if d3[1]+d1[1]<d2[0]: return False 100 101 return True
102
103 -def ScaffoldPasses(combo,bins=None):
104 """ checks the scaffold passed in to see if all 105 contributing triangles can satisfy the triangle inequality 106 107 the scaffold itself (encoded in combo) is a list of binned distances 108 109 """ 110 # this is the number of points in the pharmacophore 111 nPts = nDistPointDict[len(combo)] 112 tris = GetTriangles(nPts) 113 for tri in tris: 114 ds = [bins[combo[x]] for x in tri] 115 if not BinsTriangleInequality(ds[0],ds[1],ds[2]): 116 return False 117 return True
118 119 120 _numCombDict = {}
121 -def NumCombinations(nItems,nSlots):
122 """ returns the number of ways to fit nItems into nSlots 123 124 We assume that (x,y) and (y,x) are equivalent, and 125 (x,x) is allowed. 126 127 General formula is, for N items and S slots: 128 res = (N+S-1)! / ( (N-1)! * S! ) 129 130 """ 131 global _numCombDict 132 res = _numCombDict.get((nItems,nSlots),-1) 133 if res == -1: 134 res = _fact(nItems+nSlots-1) / (_fact(nItems-1)*_fact(nSlots)) 135 _numCombDict[(nItems,nSlots)] = res 136 return res
137 138 _verbose = 0 139 140 _countCache={}
141 -def CountUpTo(nItems,nSlots,vs,idx=0,startAt=0):
142 """ Figures out where a given combination of indices would 143 occur in the combinatorial explosion generated by _GetIndexCombinations_ 144 145 **Arguments** 146 147 - nItems: the number of items to distribute 148 149 - nSlots: the number of slots in which to distribute them 150 151 - vs: a sequence containing the values to find 152 153 - idx: used in the recursion 154 155 - startAt: used in the recursion 156 157 **Returns** 158 159 an integer 160 161 """ 162 global _countCache 163 if _verbose: 164 print ' '*idx,'CountUpTo(%d)'%idx,vs[idx],startAt 165 if idx==0 and _countCache.has_key((nItems,nSlots,tuple(vs))): 166 return _countCache[(nItems,nSlots,tuple(vs))] 167 elif idx >= nSlots: 168 accum = 0 169 elif idx == nSlots-1: 170 accum = vs[idx]-startAt 171 else: 172 accum = 0 173 # get the digit at idx correct 174 for i in range(startAt,vs[idx]): 175 nLevsUnder = nSlots-idx-1 176 nValsOver = nItems-i 177 if _verbose: 178 print ' '*idx,' ',i,nValsOver,nLevsUnder,\ 179 NumCombinations(nValsOver,nLevsUnder) 180 accum += NumCombinations(nValsOver,nLevsUnder) 181 accum += CountUpTo(nItems,nSlots,vs,idx+1,vs[idx]) 182 if _verbose: print ' '*idx,'>',accum 183 if idx == 0: 184 _countCache[(nItems,nSlots,tuple(vs))] = accum 185 return accum
186 187 _indexCombinations={}
188 -def GetIndexCombinations(nItems,nSlots,slot=0,lastItemVal=0):
189 """ Generates all combinations of nItems in nSlots without including 190 duplicates 191 192 **Arguments** 193 194 - nItems: the number of items to distribute 195 196 - nSlots: the number of slots in which to distribute them 197 198 - slot: used in recursion 199 200 - lastItemVal: used in recursion 201 202 **Returns** 203 204 a list of lists 205 206 """ 207 global _indexCombinations 208 if not slot and _indexCombinations.has_key((nItems,nSlots)): 209 res = _indexCombinations[(nItems,nSlots)] 210 elif slot >= nSlots: 211 res = [] 212 elif slot == nSlots-1: 213 res = [[x] for x in range(lastItemVal,nItems)] 214 else: 215 res = [] 216 for x in range(lastItemVal,nItems): 217 tmp = GetIndexCombinations(nItems,nSlots,slot+1,x) 218 for entry in tmp: 219 res.append([x]+entry) 220 if not slot: 221 _indexCombinations[(nItems,nSlots)] = res 222 return res
223
224 -def GetAllCombinations(choices,noDups=1,which=0):
225 """ Does the combinatorial explosion of the possible combinations 226 of the elements of _choices_. 227 228 **Arguments** 229 230 - choices: sequence of sequences with the elements to be enumerated 231 232 - noDups: (optional) if this is nonzero, results with duplicates, 233 e.g. (1,1,0), will not be generated 234 235 - which: used in recursion 236 237 **Returns** 238 239 a list of lists 240 241 >>> GetAllCombinations([(0,),(1,),(2,)]) 242 [[0, 1, 2]] 243 >>> GetAllCombinations([(0,),(1,3),(2,)]) 244 [[0, 1, 2], [0, 3, 2]] 245 246 >>> GetAllCombinations([(0,1),(1,3),(2,)]) 247 [[0, 1, 2], [0, 3, 2], [1, 3, 2]] 248 249 """ 250 if which >= len(choices): 251 res = [] 252 elif which == len(choices)-1: 253 res = [[x] for x in choices[which]] 254 else: 255 res = [] 256 tmp = GetAllCombinations(choices,noDups=noDups, 257 which=which+1) 258 for thing in choices[which]: 259 for other in tmp: 260 if not noDups or thing not in other: 261 res.append([thing]+other) 262 return res
263
264 -def GetUniqueCombinations(choices,classes,which=0):
265 """ Does the combinatorial explosion of the possible combinations 266 of the elements of _choices_. 267 268 """ 269 assert len(choices)==len(classes) 270 if which >= len(choices): 271 res = [] 272 elif which == len(choices)-1: 273 res = [[(classes[which],x)] for x in choices[which]] 274 else: 275 res = [] 276 tmp = GetUniqueCombinations(choices,classes, 277 which=which+1) 278 for thing in choices[which]: 279 for other in tmp: 280 idxThere=len([x for x in other if x[1]==thing]) 281 if not idxThere: 282 newL = [(classes[which],thing)]+other 283 newL.sort() 284 if newL not in res: 285 res.append(newL) 286 return res
287
288 -def UniquifyCombinations(combos):
289 """ uniquifies the combinations in the argument 290 291 **Arguments**: 292 293 - combos: a sequence of sequences 294 295 **Returns** 296 297 - a list of tuples containing the unique combos 298 299 """ 300 print '>>> u:',combos 301 resD = {} 302 for combo in combos: 303 k = combo[:] 304 k.sort() 305 resD[tuple(k)] = tuple(combo) 306 print ' >>> u:',resD.values() 307 return resD.values()
308
309 -def GetPossibleScaffolds(nPts,bins,useTriangleInequality=True):
310 """ gets all realizable scaffolds (passing the triangle inequality) with the 311 given number of points and returns them as a list of tuples 312 313 """ 314 if nPts < 2: 315 res = 0 316 elif nPts == 2: 317 res = [(x,) for x in range(len(bins))] 318 else: 319 nDists = len(nPointDistDict[nPts]) 320 combos = GetAllCombinations([range(len(bins))]*nDists,noDups=0) 321 res = [] 322 for combo in combos: 323 if not useTriangleInequality or ScaffoldPasses(combo,bins): 324 res.append(tuple(combo)) 325 return res
326
327 -def OrderTriangle(featIndices,dists):
328 """ 329 put the distances for a triangle into canonical order 330 331 It's easy if the features are all different: 332 >>> OrderTriangle([0,2,4],[1,2,3]) 333 ([0, 2, 4], [1, 2, 3]) 334 335 It's trickiest if they are all the same: 336 >>> OrderTriangle([0,0,0],[1,2,3]) 337 ([0, 0, 0], [3, 2, 1]) 338 >>> OrderTriangle([0,0,0],[2,1,3]) 339 ([0, 0, 0], [3, 2, 1]) 340 >>> OrderTriangle([0,0,0],[1,3,2]) 341 ([0, 0, 0], [3, 2, 1]) 342 >>> OrderTriangle([0,0,0],[3,1,2]) 343 ([0, 0, 0], [3, 2, 1]) 344 >>> OrderTriangle([0,0,0],[3,2,1]) 345 ([0, 0, 0], [3, 2, 1]) 346 347 >>> OrderTriangle([0,0,1],[3,2,1]) 348 ([0, 0, 1], [3, 2, 1]) 349 >>> OrderTriangle([0,0,1],[1,3,2]) 350 ([0, 0, 1], [1, 3, 2]) 351 >>> OrderTriangle([0,0,1],[1,2,3]) 352 ([0, 0, 1], [1, 3, 2]) 353 >>> OrderTriangle([0,0,1],[1,3,2]) 354 ([0, 0, 1], [1, 3, 2]) 355 356 """ 357 if len(featIndices)!=3: raise ValueError,'bad indices' 358 if len(dists)!=3: raise ValueError,'bad dists' 359 360 fs = set(featIndices) 361 if len(fs)==3: 362 return featIndices,dists 363 364 dSums=[0]*3 365 dSums[0] = dists[0]+dists[1] 366 dSums[1] = dists[0]+dists[2] 367 dSums[2] = dists[1]+dists[2] 368 mD = max(dSums) 369 if len(fs)==1: 370 if dSums[0]==mD: 371 if dists[0]>dists[1]: 372 ireorder=(0,1,2) 373 dreorder=(0,1,2) 374 else: 375 ireorder=(0,2,1) 376 dreorder=(1,0,2) 377 elif dSums[1]==mD: 378 if dists[0]>dists[2]: 379 ireorder=(1,0,2) 380 dreorder=(0,2,1) 381 else: 382 ireorder=(1,2,0) 383 dreorder=(2,0,1) 384 else: 385 if dists[1]>dists[2]: 386 ireorder = (2,0,1) 387 dreorder=(1,2,0) 388 else: 389 ireorder = (2,1,0) 390 dreorder=(2,1,0) 391 else: 392 # two classes 393 if featIndices[0]==featIndices[1]: 394 if dists[1]>dists[2]: 395 ireorder=(0,1,2) 396 dreorder=(0,1,2) 397 else: 398 ireorder=(1,0,2) 399 dreorder=(0,2,1) 400 elif featIndices[0]==featIndices[2]: 401 if dists[0]>dists[2]: 402 ireorder=(0,1,2) 403 dreorder=(0,1,2) 404 else: 405 ireorder=(2,1,0) 406 dreorder=(2,1,0) 407 else: #featIndices[1]==featIndices[2]: 408 if dists[0]>dists[1]: 409 ireorder=(0,1,2) 410 dreorder=(0,1,2) 411 else: 412 ireorder=(0,2,1) 413 dreorder=(1,0,2) 414 dists = [dists[x] for x in dreorder] 415 featIndices = [featIndices[x] for x in ireorder] 416 return featIndices,dists
417 418 #------------------------------------ 419 # 420 # doctest boilerplate 421 #
422 -def _test():
423 import doctest,sys 424 return doctest.testmod(sys.modules["__main__"])
425 426 427 if __name__ == '__main__': 428 import sys 429 failed,tried = _test() 430 sys.exit(failed) 431