1
2
3
4
5
6
7
8
9 """ EState fingerprinting
10
11 """
12 import numpy
13 from rdkit.Chem.EState import EStateIndices
14 from rdkit.Chem.EState import AtomTypes
15
17 """ generates the EState fingerprints for the molecule
18
19 Concept from the paper: Hall and Kier JCICS _35_ 1039-1045 (1995)
20
21 two numeric arrays are returned:
22 The first (of ints) contains the number of times each possible atom type is hit
23 The second (of floats) contains the sum of the EState indices for atoms of
24 each type.
25
26 """
27 if AtomTypes.esPatterns is None:
28 AtomTypes.BuildPatts()
29 esIndices = EStateIndices(mol)
30
31 nPatts = len(AtomTypes.esPatterns)
32 counts = numpy.zeros(nPatts,numpy.int)
33 sums = numpy.zeros(nPatts,numpy.float)
34
35 for i,(name,pattern) in enumerate(AtomTypes.esPatterns):
36 matches = mol.GetSubstructMatches(pattern,uniquify=1)
37 counts[i] = len(matches)
38 for match in matches:
39 sums[i] += esIndices[match[0]]
40 return counts,sums
41
42
43 if __name__ == '__main__':
44 from rdkit import Chem
45 smis = ['CC','CCC','c1[nH]cnc1CC(N)C(O)=O','NCCc1ccc(O)c(O)c1']
46 for smi in smis:
47 m = Chem.MolFromSmiles(smi)
48 print smi,Chem.MolToSmiles(m)
49 types = AtomTypes.TypeAtoms(m)
50 for i in range(m.GetNumAtoms()):
51 print '%d %4s: %s'%(i+1,m.GetAtomWithIdx(i).GetSymbol(),str(types[i]))
52 es = EStateIndices(m)
53 counts,sums = FingerprintMol(m)
54 for i in range(len(AtomTypes.esPatterns)):
55 if counts[i]:
56 name,patt = AtomTypes.esPatterns[i]
57 print '%6s, % 2d, % 5.4f'%(name,counts[i],sums[i])
58 for i in range(len(es)):
59 print '% 2d, % 5.4f'%(i+1,es[i])
60 print '--------'
61