1
2
3
4
5
6
7 """ EState fingerprinting
8
9 """
10 import Numeric
11 from Chem.EState import EStateIndices
12 from Chem.EState import AtomTypes
13
15 """ generates the EState fingerprints for the molecule
16
17 Concept from the paper: Hall and Kier JCICS _35_ 1039-1045 (1995)
18
19 two numeric arrays are returned:
20 The first (of ints) contains the number of times each possible atom type is hit
21 The second (of floats) contains the sum of the EState indices for atoms of
22 each type.
23
24 """
25 if AtomTypes.esPatterns is None:
26 AtomTypes.BuildPatts()
27 esIndices = EStateIndices(mol)
28
29 nPatts = len(AtomTypes.esPatterns)
30 counts = Numeric.zeros(nPatts,Numeric.Int)
31 sums = Numeric.zeros(nPatts,Numeric.Float)
32
33 for i,(name,pattern) in enumerate(AtomTypes.esPatterns):
34 matches = mol.GetSubstructMatches(pattern,uniquify=1)
35 counts[i] = len(matches)
36 for match in matches:
37 sums[i] += esIndices[match[0]]
38 return counts,sums
39
40
41 if __name__ == '__main__':
42 import Chem
43 smis = ['CC','CCC','c1[nH]cnc1CC(N)C(O)=O','NCCc1ccc(O)c(O)c1']
44 for smi in smis:
45 m = Chem.MolFromSmiles(smi)
46 print smi,Chem.MolToSmiles(m)
47 types = AtomTypes.TypeAtoms(m)
48 for i in range(m.GetNumAtoms()):
49 print '%d %4s: %s'%(i+1,m.GetAtomWithIdx(i).GetSymbol(),str(types[i]))
50 es = EStateIndices(m)
51 counts,sums = FingerprintMol(m)
52 for i in range(len(AtomTypes.esPatterns)):
53 if counts[i]:
54 name,patt = AtomTypes.esPatterns[i]
55 print '%6s, % 2d, % 5.4f'%(name,counts[i],sums[i])
56 for i in range(len(es)):
57 print '% 2d, % 5.4f'%(i+1,es[i])
58 print '--------'
59