1
2
3
4
5
6
7 """ Calculation of topological/topochemical descriptors.
8
9
10
11 """
12 from rdkit import Chem
13 from rdkit.Chem import Graphs
14 from rdkit.Chem import rdchem
15
16 from rdkit.Chem import pyPeriodicTable as PeriodicTable
17 import numpy
18 import math
19 from rdkit.ML.InfoTheory import entropy
20
21 periodicTable = rdchem.GetPeriodicTable()
22
23 _log2val = math.log(2)
26
28 """ *Internal Use Only*
29
30 this is just a row sum of the matrix... simple, neh?
31
32 """
33 if not onlyOnes:
34 res = sum(mat)
35 else:
36 res = sum(numpy.equal(mat,1))
37 return res
38
40 """ *Internal Use Only*
41
42 """
43 res = mol.GetNumBonds()
44 return res
45
47 """ *Internal Use Only*
48
49 """
50 res = {}
51 for v in arr:
52 res[v] = res.get(v,0)+1
53 return res
54
55
57 """ calculate the Hall-Kier alpha value for a molecule
58
59 From equations (58) of Rev. Comp. Chem. vol 2, 367-422, (1991)
60
61 """
62 alphaSum = 0.0
63 rC = PeriodicTable.nameTable['C'][5]
64 for atom in m.GetAtoms():
65 atNum=atom.GetAtomicNum()
66 if not atNum: continue
67 symb = atom.GetSymbol()
68 alphaV = PeriodicTable.hallKierAlphas.get(symb,None)
69 if alphaV is not None:
70 hyb = atom.GetHybridization()-2
71 if(hyb<len(alphaV)):
72 alpha = alphaV[hyb]
73 if alpha is None:
74 alpha = alphaV[-1]
75 else:
76 alpha = alphaV[-1]
77 else:
78 rA = PeriodicTable.nameTable[symb][5]
79 alpha = rA/rC - 1
80 alphaSum += alpha
81 return alphaSum
82 HallKierAlpha.version="1.0.2"
83
84 -def Ipc(mol, avg = 0, dMat = None, forceDMat = 0):
85 """This returns the information content of the coefficients of the characteristic
86 polynomial of the adjacency matrix of a hydrogen-suppressed graph of a molecule.
87
88 'avg = 1' returns the information content divided by the total population.
89
90 From D. Bonchev & N. Trinajstic, J. Chem. Phys. vol 67, 4517-4533 (1977)
91
92 """
93 if forceDMat or dMat is None:
94 if forceDMat:
95 dMat = Chem.GetDistanceMatrix(mol,0)
96 mol._adjMat = dMat
97 else:
98 try:
99 dMat = mol._adjMat
100 except AttributeError:
101 dMat = Chem.GetDistanceMatrix(mol,0)
102 mol._adjMat = dMat
103
104 adjMat = numpy.equal(dMat,1)
105 cPoly = abs(Graphs.CharacteristicPolynomial(mol, adjMat))
106 if avg:
107 return entropy.InfoEntropy(cPoly)
108 else:
109 return sum(cPoly)*entropy.InfoEntropy(cPoly)
110 Ipc.version="1.0.0"
111
112
113
115 """ Hall-Kier Kappa1 value
116
117 From equations (58) and (59) of Rev. Comp. Chem. vol 2, 367-422, (1991)
118
119 """
120 P1 = mol.GetNumBonds(1)
121 A = mol.GetNumAtoms(onlyHeavy=1)
122 alpha = HallKierAlpha(mol)
123 denom = P1 + alpha
124 if denom:
125 kappa = (A + alpha)*(A + alpha - 1)**2 / denom**2
126 else:
127 kappa = 0.0
128 return kappa
129 Kappa1.version="1.0.0"
130
131
133 """ Hall-Kier Kappa2 value
134
135 From equations (58) and (60) of Rev. Comp. Chem. vol 2, 367-422, (1991)
136
137 """
138 P2 = len(Chem.FindAllPathsOfLengthN(mol,2))
139 A = mol.GetNumAtoms(onlyHeavy=1)
140 alpha = HallKierAlpha(mol)
141 denom = (P2 + alpha)**2
142 if denom:
143 kappa = (A + alpha - 1)*(A + alpha - 2)**2 / denom
144 else:
145 kappa = 0
146 return kappa
147 Kappa2.version="1.0.0"
148
150 """ Hall-Kier Kappa3 value
151
152 From equations (58), (61) and (62) of Rev. Comp. Chem. vol 2, 367-422, (1991)
153
154 """
155 P3 = len(Chem.FindAllPathsOfLengthN(mol,3))
156 A = mol.GetNumAtoms(1)
157 alpha = HallKierAlpha(mol)
158 denom = (P3 + alpha)**2
159 if denom:
160 if A % 2 == 1:
161 kappa = (A + alpha - 1)*(A + alpha - 3)**2 / denom
162 else:
163 kappa = (A + alpha - 2)*(A + alpha - 3)**2 / denom
164 else:
165 kappa = 0
166 return kappa
167 Kappa3.version="1.0.0"
168
170 """ From equations (1),(9) and (10) of Rev. Comp. Chem. vol 2, 367-422, (1991)
171
172 """
173 deltas = [x.GetDegree() for x in mol.GetAtoms()]
174 while 0 in deltas:
175 deltas.remove(0)
176 deltas = numpy.array(deltas,'d')
177 res = sum(numpy.sqrt(1./deltas))
178 return res
179 Chi0.version="1.0.0"
180
181
183 """ From equations (1),(11) and (12) of Rev. Comp. Chem. vol 2, 367-422, (1991)
184
185 """
186 c1s = [x.GetBeginAtom().GetDegree()*x.GetEndAtom().GetDegree() for x in mol.GetBonds()]
187 while 0 in c1s:
188 c1s.remove(0)
189 c1s = numpy.array(c1s,'d')
190 res = sum(numpy.sqrt(1./c1s))
191 return res
192 Chi1.version="1.0.0"
193
196
214
216 """ From equations (5),(9) and (10) of Rev. Comp. Chem. vol 2, 367-422, (1991)
217
218 """
219 deltas = _hkDeltas(mol)
220 while 0 in deltas:
221 deltas.remove(0)
222 res = sum(numpy.sqrt(1./numpy.array(deltas)))
223 return res
224 Chi0v.version="1.0.0"
225
227 """ From equations (5),(11) and (12) of Rev. Comp. Chem. vol 2, 367-422, (1991)
228
229 """
230 deltas = numpy.array(_hkDeltas(mol,skipHs=0))
231 res = 0.0
232 for bond in mol.GetBonds():
233 v = deltas[bond.GetBeginAtomIdx()]*deltas[bond.GetEndAtomIdx()]
234 if v != 0.0:
235 res += numpy.sqrt(1./v)
236 return res
237 Chi1v.version="1.0.0"
238
240 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
241
242 **NOTE**: because the current path finding code does, by design,
243 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
244 length 3), values of ChiNv with N >= 3 may give results that differ
245 from those provided by the old code in molecules that have rings of
246 size 3.
247
248 """
249 deltas = numpy.array(_hkDeltas(mol,skipHs=0))
250 accum = 0.0
251
252 for path in Chem.FindAllPathsOfLengthN(mol,order+1,useBonds=0):
253 ats = []
254 cAccum = 1.0
255
256 for idx in path:
257 cAccum *= deltas[idx]
258 if cAccum:
259 accum += 1./numpy.sqrt(cAccum)
260 return accum
261
263 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
264
265 """
266 return ChiNv_(mol,2)
267 Chi2v.version="1.0.0"
268
270 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
271
272 """
273 return ChiNv_(mol,3)
274 Chi3v.version="1.0.0"
275
277 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
278
279 **NOTE**: because the current path finding code does, by design,
280 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
281 length 3), values of Chi4v may give results that differ from those
282 provided by the old code in molecules that have 3 rings.
283
284 """
285 return ChiNv_(mol,4)
286 Chi4v.version="1.0.0"
287
289 """ Similar to Hall Kier Chi0v, but uses nVal instead of valence
290 This makes a big difference after we get out of the first row.
291
292 """
293 deltas = [_nVal(x) for x in mol.GetAtoms()]
294 while deltas.count(0):
295 deltas.remove(0)
296 deltas = numpy.array(deltas,'d')
297 res = sum(numpy.sqrt(1./deltas))
298 return res
299 Chi0n.version="1.0.0"
300
312 Chi1n.version="1.0.0"
314 """ Similar to Hall Kier ChiNv, but uses nVal instead of valence
315 This makes a big difference after we get out of the first row.
316
317 **NOTE**: because the current path finding code does, by design,
318 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
319 length 3), values of ChiNn with N >= 3 may give results that differ
320 from those provided by the old code in molecules that have rings of
321 size 3.
322
323 """
324 deltas = numpy.array([_nVal(x) for x in mol.GetAtoms()],'d')
325 accum = 0.0
326 for path in Chem.FindAllPathsOfLengthN(mol,order+1,useBonds=0):
327 cAccum = 1.0
328 for idx in path:
329 cAccum *= deltas[idx]
330 if cAccum:
331 accum += 1./numpy.sqrt(cAccum)
332 return accum
333
335 """ Similar to Hall Kier Chi2v, but uses nVal instead of valence
336 This makes a big difference after we get out of the first row.
337
338 """
339 return ChiNn_(mol,2)
340 Chi2n.version="1.0.0"
341
343 """ Similar to Hall Kier Chi3v, but uses nVal instead of valence
344 This makes a big difference after we get out of the first row.
345
346 """
347 return ChiNn_(mol,3)
348 Chi3n.version="1.0.0"
349
351 """ Similar to Hall Kier Chi4v, but uses nVal instead of valence
352 This makes a big difference after we get out of the first row.
353
354
355 **NOTE**: because the current path finding code does, by design,
356 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
357 length 3), values of Chi4n may give results that differ from those
358 provided by the old code in molecules that have 3 rings.
359
360 """
361 return ChiNn_(mol,4)
362 Chi4n.version="1.0.0"
363
364
365 -def BalabanJ(mol,dMat=None,forceDMat=0):
366 """ Calculate Balaban's J value for a molecule
367
368 **Arguments**
369
370 - mol: a molecule
371
372 - dMat: (optional) a distance/adjacency matrix for the molecule, if this
373 is not provide, one will be calculated
374
375 - forceDMat: (optional) if this is set, the distance/adjacency matrix
376 will be recalculated regardless of whether or not _dMat_ is provided
377 or the molecule already has one
378
379 **Returns**
380
381 - a float containing the J value
382
383 We follow the notation of Balaban's paper:
384 Chem. Phys. Lett. vol 89, 399-404, (1982)
385
386 """
387
388 if forceDMat or dMat is None:
389 if forceDMat:
390
391 dMat = Chem.GetDistanceMatrix(mol,useBO=1,useAtomWts=0,force=1)
392 mol._balabanMat = dMat
393 adjMat = Chem.GetAdjacencyMatrix(mol,useBO=0,emptyVal=0,force=0,prefix="NoBO")
394 mol._adjMat = adjMat
395 else:
396 try:
397
398 dMat = mol._balabanMat
399 except AttributeError:
400
401 dMat = Chem.GetDistanceMatrix(mol,useBO=1,useAtomWts=0,force=0,prefix="Balaban")
402
403 mol._balabanMat = dMat
404 try:
405 adjMat = mol._adjMat
406 except AttributeError:
407 adjMat = Chem.GetAdjacencyMatrix(mol,useBO=0,emptyVal=0,force=0,prefix="NoBO")
408 mol._adjMat = adjMat
409 else:
410 adjMat = Chem.GetAdjacencyMatrix(mol,useBO=0,emptyVal=0,force=0,prefix="NoBO")
411
412 s = _VertexDegrees(dMat)
413 q = _NumAdjacencies(mol,dMat)
414 n = mol.GetNumAtoms()
415 mu = q - n + 1
416
417 sum = 0.
418 nS = len(s)
419 for i in range(nS):
420 si = s[i]
421 for j in range(i,nS):
422 if adjMat[i,j] == 1:
423 sum += 1./numpy.sqrt(si*s[j])
424
425 if mu+1 != 0:
426 J = float(q) / float(mu + 1) * sum
427 else:
428 J = 0
429
430 return J
431 BalabanJ.version="1.0.0"
432
433
434
435
436
437
438
439
441 """
442 Used by BertzCT
443
444 vdList: the number of neighbors each atom has
445 bdMat: "balaban" distance matrix
446
447 """
448 if forceBDMat:
449 bdMat = Chem.GetDistanceMatrix(mol,useBO=1,useAtomWts=0,force=1,
450 prefix="Balaban")
451 mol._balabanMat = bdMat
452
453 atomIdx = 0
454 keysSeen = []
455 symList = [0]*numAtoms
456 for i in range(numAtoms):
457 tmpList = bdMat[i].tolist()
458 tmpList.sort()
459 theKey = tuple(['%.4f'%x for x in tmpList[:cutoff]])
460 try:
461 idx = keysSeen.index(theKey)
462 except ValueError:
463 idx = len(keysSeen)
464 keysSeen.append(theKey)
465 symList[i] = idx+1
466 return tuple(symList)
467
469 """
470 Used by BertzCT
471 """
472 if atom1Id < atom2Id:
473 theKey = (atom1Id,atom2Id)
474 else:
475 theKey = (atom2Id,atom1Id)
476 tmp = bondDic[theKey]
477 if tmp == Chem.BondType.AROMATIC:
478 tmp = 1.5
479 else:
480 tmp = float(tmp)
481
482 return tmp
483
484
486 """
487 Used by BertzCT
488 """
489 connectionList = connectionDict.values()
490 totConnections = sum(connectionList)
491 connectionIE = totConnections*(entropy.InfoEntropy(numpy.array(connectionList)) +
492 math.log(totConnections)/_log2val)
493 atomTypeList = atomTypeDict.values()
494 atomTypeIE = numAtoms*entropy.InfoEntropy(numpy.array(atomTypeList))
495 return atomTypeIE + connectionIE
496
498 """ _Internal Use Only_
499 Used by BertzCT
500
501 """
502 bondDict = {}
503 nList = [None]*numAtoms
504 vdList = [0]*numAtoms
505 for aBond in mol.GetBonds():
506 atom1=aBond.GetBeginAtomIdx()
507 atom2=aBond.GetEndAtomIdx()
508 if atom1>atom2: atom2,atom1=atom1,atom2
509 if not aBond.GetIsAromatic():
510 bondDict[(atom1,atom2)] = aBond.GetBondType()
511 else:
512
513 bondDict[(atom1,atom2)] = Chem.BondType.AROMATIC
514 if nList[atom1] is None:
515 nList[atom1] = [atom2]
516 elif atom2 not in nList[atom1]:
517 nList[atom1].append(atom2)
518 if nList[atom2] is None:
519 nList[atom2] = [atom1]
520 elif atom1 not in nList[atom2]:
521 nList[atom2].append(atom1)
522
523 for i,element in enumerate(nList):
524 try:
525 element.sort()
526 vdList[i] = len(element)
527 except:
528 vdList[i] = 0
529 return bondDict, nList, vdList
530
531 -def BertzCT(mol, cutoff = 100, dMat = None, forceDMat = 1):
532 """ A topological index meant to quantify "complexity" of molecules.
533
534 Consists of a sum of two terms, one representing the complexity
535 of the bonding, the other representing the complexity of the
536 distribution of heteroatoms.
537
538 From S. H. Bertz, J. Am. Chem. Soc., vol 103, 3599-3601 (1981)
539
540 "cutoff" is an integer value used to limit the computational
541 expense. A cutoff value tells the program to consider vertices
542 topologically identical if their distance vectors (sets of
543 distances to all other vertices) are equal out to the "cutoff"th
544 nearest-neighbor.
545
546 **NOTE** The original implementation had the following comment:
547 > this implementation treats aromatic rings as the
548 > corresponding Kekule structure with alternating bonds,
549 > for purposes of counting "connections".
550 Upon further thought, this is the WRONG thing to do. It
551 results in the possibility of a molecule giving two different
552 CT values depending on the kekulization. For example, in the
553 old implementation, these two SMILES:
554 CC2=CN=C1C3=C(C(C)=C(C=N3)C)C=CC1=C2C
555 CC3=CN=C2C1=NC=C(C)C(C)=C1C=CC2=C3C
556 which correspond to differentk kekule forms, yield different
557 values.
558 The new implementation uses consistent (aromatic) bond orders
559 for aromatic bonds.
560
561 THIS MEANS THAT THIS IMPLEMENTATION IS NOT BACKWARDS COMPATIBLE.
562
563 Any molecule containing aromatic rings will yield different
564 values with this implementation. The new behavior is the correct
565 one, so we're going to live with the breakage.
566
567 **NOTE** this barfs if the molecule contains a second (or
568 nth) fragment that is one atom.
569
570 """
571 atomTypeDict = {}
572 connectionDict = {}
573 numAtoms = mol.GetNumAtoms()
574 if forceDMat or dMat is None:
575 if forceDMat:
576
577 dMat = Chem.GetDistanceMatrix(mol,useBO=0,useAtomWts=0,force=1)
578 mol._adjMat = dMat
579 else:
580 try:
581 dMat = mol._adjMat
582 except AttributeError:
583 dMat = Chem.GetDistanceMatrix(mol,useBO=0,useAtomWts=0,force=1)
584 mol._adjMat = dMat
585
586 if numAtoms < 2:
587 return 0
588
589 bondDict, neighborList, vdList = _CreateBondDictEtc(mol, numAtoms)
590 symmetryClasses = _AssignSymmetryClasses(mol, vdList, dMat, forceDMat, numAtoms, cutoff)
591
592 for atomIdx in range(numAtoms):
593 hingeAtomNumber = mol.GetAtomWithIdx(atomIdx).GetAtomicNum()
594 atomTypeDict[hingeAtomNumber] = atomTypeDict.get(hingeAtomNumber,0)+1
595
596 hingeAtomClass = symmetryClasses[atomIdx]
597 numNeighbors = vdList[atomIdx]
598 for i in range(numNeighbors):
599 neighbor_iIdx = neighborList[atomIdx][i]
600 NiClass = symmetryClasses[neighbor_iIdx]
601 bond_i_order = _LookUpBondOrder(atomIdx, neighbor_iIdx, bondDict)
602
603 if (bond_i_order > 1) and (neighbor_iIdx > atomIdx):
604 numConnections = bond_i_order*(bond_i_order - 1)/2
605 connectionKey = (min(hingeAtomClass, NiClass), max(hingeAtomClass, NiClass))
606 connectionDict[connectionKey] = connectionDict.get(connectionKey,0)+numConnections
607
608 for j in range(i+1, numNeighbors):
609 neighbor_jIdx = neighborList[atomIdx][j]
610 NjClass = symmetryClasses[neighbor_jIdx]
611 bond_j_order = _LookUpBondOrder(atomIdx, neighbor_jIdx, bondDict)
612 numConnections = bond_i_order*bond_j_order
613 connectionKey = (min(NiClass, NjClass), hingeAtomClass, max(NiClass, NjClass))
614 connectionDict[connectionKey] = connectionDict.get(connectionKey,0)+numConnections
615
616 if not connectionDict:
617 connectionDict = {'a':1}
618
619 ks = connectionDict.keys()
620 ks.sort()
621 return _CalculateEntropies(connectionDict, atomTypeDict, numAtoms)
622 BertzCT.version="2.0.0"
623
624
625
626
627
628
629
630
631
632
633