1
2
3
4
5
6
7 """ Calculation of topological/topochemical descriptors.
8
9
10
11 """
12 import Chem
13 from Chem import Graphs
14 from Chem import rdchem
15
16 from Chem import pyPeriodicTable as PeriodicTable
17 from Numeric import *
18 from ML.InfoTheory import entropy
19
20 periodicTable = rdchem.GetPeriodicTable()
21
22 _log2val = log(2)
25
27 """ *Internal Use Only*
28
29 this is just a row sum of the matrix... simple, neh?
30
31 """
32 if not onlyOnes:
33 res = sum(mat)
34 else:
35 res = sum(equal(mat,1))
36 return res
37
39 """ *Internal Use Only*
40
41 """
42 res = mol.GetNumBonds()
43 return res
44
46 """ *Internal Use Only*
47
48 """
49 res = {}
50 for v in arr:
51 res[v] = res.get(v,0)+1
52 return res
53
54
56 """ calculate the Hall-Kier alpha value for a molecule
57
58 From equations (58) of Rev. Comp. Chem. vol 2, 367-422, (1991)
59
60 """
61 alphaSum = 0.0
62 rC = PeriodicTable.nameTable['C'][5]
63 for atom in m.GetAtoms():
64 atNum=atom.GetAtomicNum()
65 if not atNum: continue
66 symb = atom.GetSymbol()
67 alphaV = PeriodicTable.hallKierAlphas.get(symb,None)
68 if alphaV is not None:
69 hyb = atom.GetHybridization()-2
70 if(hyb<len(alphaV)):
71 alpha = alphaV[hyb]
72 if alpha is None:
73 alpha = alphaV[-1]
74 else:
75 alpha = alphaV[-1]
76 else:
77 rA = PeriodicTable.nameTable[symb][5]
78 alpha = rA/rC - 1
79 alphaSum += alpha
80 return alphaSum
81 HallKierAlpha.version="1.0.2"
82
83 -def Ipc(mol, avg = 0, dMat = None, forceDMat = 0):
84 """This returns the information content of the coefficients of the characteristic
85 polynomial of the adjacency matrix of a hydrogen-suppressed graph of a molecule.
86
87 'avg = 1' returns the information content divided by the total population.
88
89 From D. Bonchev & N. Trinajstic, J. Chem. Phys. vol 67, 4517-4533 (1977)
90
91 """
92 if forceDMat or dMat is None:
93 if forceDMat:
94 dMat = Chem.GetDistanceMatrix(mol,0)
95 mol._adjMat = dMat
96 else:
97 try:
98 dMat = mol._adjMat
99 except AttributeError:
100 dMat = Chem.GetDistanceMatrix(mol,0)
101 mol._adjMat = dMat
102
103 adjMat = equal(dMat,1)
104 cPoly = abs(Graphs.CharacteristicPolynomial(mol, adjMat))
105 if avg:
106 return entropy.InfoEntropy(cPoly)
107 else:
108 return sum(cPoly)*entropy.InfoEntropy(cPoly)
109 Ipc.version="1.0.0"
110
111
112
114 """ Hall-Kier Kappa1 value
115
116 From equations (58) and (59) of Rev. Comp. Chem. vol 2, 367-422, (1991)
117
118 """
119 P1 = mol.GetNumBonds(1)
120 A = mol.GetNumAtoms(onlyHeavy=1)
121 alpha = HallKierAlpha(mol)
122 denom = P1 + alpha
123 if denom:
124 kappa = (A + alpha)*(A + alpha - 1)**2 / denom**2
125 else:
126 kappa = 0.0
127 return kappa
128 Kappa1.version="1.0.0"
129
130
132 """ Hall-Kier Kappa2 value
133
134 From equations (58) and (60) of Rev. Comp. Chem. vol 2, 367-422, (1991)
135
136 """
137 P2 = len(Chem.FindAllPathsOfLengthN(mol,2))
138 A = mol.GetNumAtoms(onlyHeavy=1)
139 alpha = HallKierAlpha(mol)
140 denom = (P2 + alpha)**2
141 if denom:
142 kappa = (A + alpha - 1)*(A + alpha - 2)**2 / denom
143 else:
144 kappa = 0
145 return kappa
146 Kappa2.version="1.0.0"
147
149 """ Hall-Kier Kappa3 value
150
151 From equations (58), (61) and (62) of Rev. Comp. Chem. vol 2, 367-422, (1991)
152
153 """
154 P3 = len(Chem.FindAllPathsOfLengthN(mol,3))
155 A = mol.GetNumAtoms(1)
156 alpha = HallKierAlpha(mol)
157 denom = (P3 + alpha)**2
158 if denom:
159 if A % 2 == 1:
160 kappa = (A + alpha - 1)*(A + alpha - 3)**2 / denom
161 else:
162 kappa = (A + alpha - 2)*(A + alpha - 3)**2 / denom
163 else:
164 kappa = 0
165 return kappa
166 Kappa3.version="1.0.0"
167
169 """ From equations (1),(9) and (10) of Rev. Comp. Chem. vol 2, 367-422, (1991)
170
171 """
172 deltas = [x.GetDegree() for x in mol.GetAtoms()]
173 while 0 in deltas:
174 deltas.remove(0)
175 deltas = array(deltas,Float)
176 res = sum(sqrt(1./deltas))
177 return res
178 Chi0.version="1.0.0"
179
180
182 """ From equations (1),(11) and (12) of Rev. Comp. Chem. vol 2, 367-422, (1991)
183
184 """
185 c1s = [x.GetBeginAtom().GetDegree()*x.GetEndAtom().GetDegree() for x in mol.GetBonds()]
186 while 0 in c1s:
187 c1s.remove(0)
188 c1s = array(c1s,Float)
189 res = sum(sqrt(1./c1s))
190 return res
191 Chi1.version="1.0.0"
192
195
213
215 """ From equations (5),(9) and (10) of Rev. Comp. Chem. vol 2, 367-422, (1991)
216
217 """
218 deltas = _hkDeltas(mol)
219 while 0 in deltas:
220 deltas.remove(0)
221 res = sum(sqrt(1./array(deltas)))
222 return res
223 Chi0v.version="1.0.0"
224
226 """ From equations (5),(11) and (12) of Rev. Comp. Chem. vol 2, 367-422, (1991)
227
228 """
229 deltas = array(_hkDeltas(mol,skipHs=0))
230 res = 0.0
231 for bond in mol.GetBonds():
232 v = deltas[bond.GetBeginAtomIdx()]*deltas[bond.GetEndAtomIdx()]
233 if v != 0.0:
234 res += sqrt(1./v)
235 return res
236 Chi1v.version="1.0.0"
237
239 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
240
241 **NOTE**: because the current path finding code does, by design,
242 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
243 length 3), values of ChiNv with N >= 3 may give results that differ
244 from those provided by the old code in molecules that have rings of
245 size 3.
246
247 """
248 deltas = array(_hkDeltas(mol,skipHs=0))
249 accum = 0.0
250
251 for path in Chem.FindAllPathsOfLengthN(mol,order+1,useBonds=0):
252 ats = []
253 cAccum = 1.0
254
255 for idx in path:
256 cAccum *= deltas[idx]
257 if cAccum:
258 accum += 1./sqrt(cAccum)
259 return accum
260
262 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
263
264 """
265 return ChiNv_(mol,2)
266 Chi2v.version="1.0.0"
267
269 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
270
271 """
272 return ChiNv_(mol,3)
273 Chi3v.version="1.0.0"
274
276 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
277
278 **NOTE**: because the current path finding code does, by design,
279 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
280 length 3), values of Chi4v may give results that differ from those
281 provided by the old code in molecules that have 3 rings.
282
283 """
284 return ChiNv_(mol,4)
285 Chi4v.version="1.0.0"
286
288 """ Similar to Hall Kier Chi0v, but uses nVal instead of valence
289 This makes a big difference after we get out of the first row.
290
291 """
292 deltas = [_nVal(x) for x in mol.GetAtoms()]
293 while deltas.count(0):
294 deltas.remove(0)
295 deltas = array(deltas,Float)
296 res = sum(sqrt(1./deltas))
297 return res
298 Chi0n.version="1.0.0"
299
301 """ Similar to Hall Kier Chi1v, but uses nVal instead of valence
302
303 """
304 delts = array([_nVal(x) for x in mol.GetAtoms()],Float)
305 res = 0.0
306 for bond in mol.GetBonds():
307 v = delts[bond.GetBeginAtomIdx()]*delts[bond.GetEndAtomIdx()]
308 if v != 0.0:
309 res += sqrt(1./v)
310 return res
311 Chi1n.version="1.0.0"
313 """ Similar to Hall Kier ChiNv, but uses nVal instead of valence
314 This makes a big difference after we get out of the first row.
315
316 **NOTE**: because the current path finding code does, by design,
317 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
318 length 3), values of ChiNn with N >= 3 may give results that differ
319 from those provided by the old code in molecules that have rings of
320 size 3.
321
322 """
323 deltas = array([_nVal(x) for x in mol.GetAtoms()],Float)
324 accum = 0.0
325 for path in Chem.FindAllPathsOfLengthN(mol,order+1,useBonds=0):
326 cAccum = 1.0
327 for idx in path:
328 cAccum *= deltas[idx]
329 if cAccum:
330 accum += 1./sqrt(cAccum)
331 return accum
332
334 """ Similar to Hall Kier Chi2v, but uses nVal instead of valence
335 This makes a big difference after we get out of the first row.
336
337 """
338 return ChiNn_(mol,2)
339 Chi2n.version="1.0.0"
340
342 """ Similar to Hall Kier Chi3v, but uses nVal instead of valence
343 This makes a big difference after we get out of the first row.
344
345 """
346 return ChiNn_(mol,3)
347 Chi3n.version="1.0.0"
348
350 """ Similar to Hall Kier Chi4v, but uses nVal instead of valence
351 This makes a big difference after we get out of the first row.
352
353
354 **NOTE**: because the current path finding code does, by design,
355 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
356 length 3), values of Chi4n may give results that differ from those
357 provided by the old code in molecules that have 3 rings.
358
359 """
360 return ChiNn_(mol,4)
361 Chi4n.version="1.0.0"
362
363
364 -def BalabanJ(mol,dMat=None,forceDMat=0):
365 """ Calculate Balaban's J value for a molecule
366
367 **Arguments**
368
369 - mol: a molecule
370
371 - dMat: (optional) a distance/adjacency matrix for the molecule, if this
372 is not provide, one will be calculated
373
374 - forceDMat: (optional) if this is set, the distance/adjacency matrix
375 will be recalculated regardless of whether or not _dMat_ is provided
376 or the molecule already has one
377
378 **Returns**
379
380 - a float containing the J value
381
382 We follow the notation of Balaban's paper:
383 Chem. Phys. Lett. vol 89, 399-404, (1982)
384
385 """
386
387 if forceDMat or dMat is None:
388 if forceDMat:
389
390 dMat = Chem.GetDistanceMatrix(mol,useBO=1,useAtomWts=0,force=1)
391 mol._balabanMat = dMat
392 adjMat = Chem.GetAdjacencyMatrix(mol,useBO=0,emptyVal=0,force=0,prefix="NoBO")
393 mol._adjMat = adjMat
394 else:
395 try:
396
397 dMat = mol._balabanMat
398 except AttributeError:
399
400 dMat = Chem.GetDistanceMatrix(mol,useBO=1,useAtomWts=0,force=0,prefix="Balaban")
401
402 mol._balabanMat = dMat
403 try:
404 adjMat = mol._adjMat
405 except AttributeError:
406 adjMat = Chem.GetAdjacencyMatrix(mol,useBO=0,emptyVal=0,force=0,prefix="NoBO")
407 mol._adjMat = adjMat
408 else:
409 adjMat = Chem.GetAdjacencyMatrix(mol,useBO=0,emptyVal=0,force=0,prefix="NoBO")
410
411 s = _VertexDegrees(dMat)
412 q = _NumAdjacencies(mol,dMat)
413 n = mol.GetNumAtoms()
414 mu = q - n + 1
415
416 sum = 0.
417 nS = len(s)
418 for i in range(nS):
419 si = s[i]
420 for j in range(i,nS):
421 if adjMat[i,j] == 1:
422 sum += 1./sqrt(si*s[j])
423
424 if mu+1 != 0:
425 J = float(q) / float(mu + 1) * sum
426 else:
427 J = 0
428
429 return J
430 BalabanJ.version="1.0.0"
431
432
433
434
435
436
437
438
440 """
441 Used by BertzCT
442
443 vdList: the number of neighbors each atom has
444 bdMat: "balaban" distance matrix
445
446 """
447 if forceBDMat:
448 bdMat = Chem.GetDistanceMatrix(mol,useBO=1,useAtomWts=0,force=1,
449 prefix="Balaban")
450 mol._balabanMat = bdMat
451
452 atomIdx = 0
453 keysSeen = []
454 symList = [0]*numAtoms
455 for i in range(numAtoms):
456 tmpList = bdMat[i].tolist()
457 tmpList.sort()
458 theKey = tuple(['%.4f'%x for x in tmpList[:cutoff]])
459 try:
460 idx = keysSeen.index(theKey)
461 except ValueError:
462 idx = len(keysSeen)
463 keysSeen.append(theKey)
464 symList[i] = idx+1
465 return tuple(symList)
466
468 """
469 Used by BertzCT
470 """
471 if atom1Id < atom2Id:
472 theKey = (atom1Id,atom2Id)
473 else:
474 theKey = (atom2Id,atom1Id)
475 tmp = bondDic[theKey]
476 if tmp == Chem.BondType.AROMATIC:
477 tmp = 1.5
478 else:
479 tmp = float(tmp)
480
481 return tmp
482
483
485 """
486 Used by BertzCT
487 """
488 connectionList = connectionDict.values()
489 totConnections = sum(connectionList)
490 connectionIE = totConnections*(entropy.InfoEntropy(array(connectionList)) +
491 log(totConnections)/_log2val)
492 atomTypeList = atomTypeDict.values()
493 atomTypeIE = numAtoms*entropy.InfoEntropy(array(atomTypeList))
494 return atomTypeIE + connectionIE
495
497 """ _Internal Use Only_
498 Used by BertzCT
499
500 """
501 bondDict = {}
502 nList = [None]*numAtoms
503 vdList = [0]*numAtoms
504 for aBond in mol.GetBonds():
505 atom1=aBond.GetBeginAtomIdx()
506 atom2=aBond.GetEndAtomIdx()
507 if atom1>atom2: atom2,atom1=atom1,atom2
508 if not aBond.GetIsAromatic():
509 bondDict[(atom1,atom2)] = aBond.GetBondType()
510 else:
511
512 bondDict[(atom1,atom2)] = Chem.BondType.AROMATIC
513 if nList[atom1] is None:
514 nList[atom1] = [atom2]
515 elif atom2 not in nList[atom1]:
516 nList[atom1].append(atom2)
517 if nList[atom2] is None:
518 nList[atom2] = [atom1]
519 elif atom1 not in nList[atom2]:
520 nList[atom2].append(atom1)
521
522 for i,element in enumerate(nList):
523 try:
524 element.sort()
525 vdList[i] = len(element)
526 except:
527 vdList[i] = 0
528 return bondDict, nList, vdList
529
530 -def BertzCT(mol, cutoff = 100, dMat = None, forceDMat = 1):
531 """ A topological index meant to quantify "complexity" of molecules.
532
533 Consists of a sum of two terms, one representing the complexity
534 of the bonding, the other representing the complexity of the
535 distribution of heteroatoms.
536
537 From S. H. Bertz, J. Am. Chem. Soc., vol 103, 3599-3601 (1981)
538
539 "cutoff" is an integer value used to limit the computational
540 expense. A cutoff value tells the program to consider vertices
541 topologically identical if their distance vectors (sets of
542 distances to all other vertices) are equal out to the "cutoff"th
543 nearest-neighbor.
544
545 **NOTE** The original implementation had the following comment:
546 > this implementation treats aromatic rings as the
547 > corresponding Kekule structure with alternating bonds,
548 > for purposes of counting "connections".
549 Upon further thought, this is the WRONG thing to do. It
550 results in the possibility of a molecule giving two different
551 CT values depending on the kekulization. For example, in the
552 old implementation, these two SMILES:
553 CC2=CN=C1C3=C(C(C)=C(C=N3)C)C=CC1=C2C
554 CC3=CN=C2C1=NC=C(C)C(C)=C1C=CC2=C3C
555 which correspond to differentk kekule forms, yield different
556 values.
557 The new implementation uses consistent (aromatic) bond orders
558 for aromatic bonds.
559
560 THIS MEANS THAT THIS IMPLEMENTATION IS NOT BACKWARDS COMPATIBLE.
561
562 Any molecule containing aromatic rings will yield different
563 values with this implementation. The new behavior is the correct
564 one, so we're going to live with the breakage.
565
566 **NOTE** this barfs if the molecule contains a second (or
567 nth) fragment that is one atom.
568
569 """
570 atomTypeDict = {}
571 connectionDict = {}
572 numAtoms = mol.GetNumAtoms()
573 if forceDMat or dMat is None:
574 if forceDMat:
575
576 dMat = Chem.GetDistanceMatrix(mol,useBO=0,useAtomWts=0,force=1)
577 mol._adjMat = dMat
578 else:
579 try:
580 dMat = mol._adjMat
581 except AttributeError:
582 dMat = Chem.GetDistanceMatrix(mol,useBO=0,useAtomWts=0,force=1)
583 mol._adjMat = dMat
584
585 if numAtoms < 2:
586 return 0
587
588 bondDict, neighborList, vdList = _CreateBondDictEtc(mol, numAtoms)
589 symmetryClasses = _AssignSymmetryClasses(mol, vdList, dMat, forceDMat, numAtoms, cutoff)
590
591 for atomIdx in range(numAtoms):
592 hingeAtomNumber = mol.GetAtomWithIdx(atomIdx).GetAtomicNum()
593 atomTypeDict[hingeAtomNumber] = atomTypeDict.get(hingeAtomNumber,0)+1
594
595 hingeAtomClass = symmetryClasses[atomIdx]
596 numNeighbors = vdList[atomIdx]
597 for i in range(numNeighbors):
598 neighbor_iIdx = neighborList[atomIdx][i]
599 NiClass = symmetryClasses[neighbor_iIdx]
600 bond_i_order = _LookUpBondOrder(atomIdx, neighbor_iIdx, bondDict)
601
602 if (bond_i_order > 1) and (neighbor_iIdx > atomIdx):
603 numConnections = bond_i_order*(bond_i_order - 1)/2
604 connectionKey = (min(hingeAtomClass, NiClass), max(hingeAtomClass, NiClass))
605 connectionDict[connectionKey] = connectionDict.get(connectionKey,0)+numConnections
606
607 for j in range(i+1, numNeighbors):
608 neighbor_jIdx = neighborList[atomIdx][j]
609 NjClass = symmetryClasses[neighbor_jIdx]
610 bond_j_order = _LookUpBondOrder(atomIdx, neighbor_jIdx, bondDict)
611 numConnections = bond_i_order*bond_j_order
612 connectionKey = (min(NiClass, NjClass), hingeAtomClass, max(NiClass, NjClass))
613 connectionDict[connectionKey] = connectionDict.get(connectionKey,0)+numConnections
614
615 if not connectionDict:
616 connectionDict = {'a':1}
617
618 ks = connectionDict.keys()
619 ks.sort()
620 return _CalculateEntropies(connectionDict, atomTypeDict, numAtoms)
621 BertzCT.version="2.0.0"
622
623
624
625
626
627
628
629
630
631
632