1
2
3
4
5
6
7 """ contains SMARTS definitions and calculators for EState atom types
8
9 defined in: Hall and Kier JCICS _35_ 1039-1045 (1995) Table 1
10 """
11 from rdkit import Chem
12
13 _rawD = [
14 ('sLi','[LiD1]-*'),
15
16 ('ssBe','[BeD2](-*)-*'),
17 ('ssssBe','[BeD4](-*)(-*)(-*)-*'),
18
19 ('ssBH', '[BD2H](-*)-*'),
20 ('sssB', '[BD3](-*)(-*)-*'),
21 ('ssssB','[BD4](-*)(-*)(-*)-*'),
22
23 ('sCH3', '[CD1H3]-*'),
24 ('dCH2', '[CD1H2]=*'),
25 ('ssCH2','[CD2H2](-*)-*'),
26 ('tCH', '[CD1H]#*'),
27 ('dsCH', '[CD2H](=*)-*'),
28 ('aaCH', '[C,c;D2H](:*):*'),
29 ('sssCH','[CD3H](-*)(-*)-*'),
30 ('ddC', '[CD2H0](=*)=*'),
31 ('tsC', '[CD2H0](#*)-*'),
32 ('dssC', '[CD3H0](=*)(-*)-*'),
33 ('aasC', '[C,c;D3H0](:*)(:*)-*'),
34 ('aaaC', '[C,c;D3H0](:*)(:*):*'),
35 ('ssssC','[CD4H0](-*)(-*)(-*)-*'),
36
37 ('sNH3', '[ND1H3]-*'),
38 ('sNH2', '[ND1H2]-*'),
39 ('ssNH2','[ND2H2](-*)-*'),
40 ('dNH', '[ND1H]=*'),
41 ('ssNH', '[ND2H](-*)-*'),
42 ('aaNH', '[N,nD2H](:*):*'),
43 ('tN', '[ND1H0]#*'),
44 ('sssNH','[ND3H](-*)(-*)-*'),
45 ('dsN', '[ND2H0](=*)-*'),
46 ('aaN', '[N,nD2H0](:*):*'),
47 ('sssN', '[ND3H0](-*)(-*)-*'),
48 ('ddsN', '[ND3H0](~[OD1H0])(~[OD1H0])-,:*'),
49 ('aasN', '[N,nD3H0](:*)(:*)-,:*'),
50 ('ssssN','[ND4H0](-*)(-*)(-*)-*'),
51
52 ('sOH','[OD1H]-*'),
53 ('dO', '[OD1H0]=*'),
54 ('ssO','[OD2H0](-*)-*'),
55 ('aaO','[O,oD2H0](:*):*'),
56
57 ('sF','[FD1]-*'),
58
59 ('sSiH3', '[SiD1H3]-*'),
60 ('ssSiH2','[SiD2H2](-*)-*'),
61 ('sssSiH','[SiD3H1](-*)(-*)-*'),
62 ('ssssSi','[SiD4H0](-*)(-*)(-*)-*'),
63
64 ('sPH2', '[PD1H2]-*'),
65 ('ssPH', '[PD2H1](-*)-*'),
66 ('sssP', '[PD3H0](-*)(-*)-*'),
67 ('dsssP', '[PD4H0](=*)(-*)(-*)-*'),
68 ('sssssP','[PD5H0](-*)(-*)(-*)(-*)-*'),
69
70 ('sSH', '[SD1H1]-*'),
71 ('dS', '[SD1H0]=*'),
72 ('ssS', '[SD2H0](-*)-*'),
73 ('aaS', '[S,sD2H0](:*):*'),
74 ('dssS', '[SD3H0](=*)(-*)-*'),
75 ('ddssS','[SD4H0](~[OD1H0])(~[OD1H0])(-*)-*'),
76
77 ('sCl', '[ClD1]-*'),
78
79 ('sGeH3', '[GeD1H3](-*)'),
80 ('ssGeH2','[GeD2H2](-*)-*'),
81 ('sssGeH','[GeD3H1](-*)(-*)-*'),
82 ('ssssGe','[GeD4H0](-*)(-*)(-*)-*'),
83
84 ('sAsH2', '[AsD1H2]-*'),
85 ('ssAsH', '[AsD2H1](-*)-*'),
86 ('sssAs', '[AsD3H0](-*)(-*)-*'),
87 ('sssdAs', '[AsD4H0](=*)(-*)(-*)-*'),
88 ('sssssAs','[AsD5H0](-*)(-*)(-*)(-*)-*'),
89
90 ('sSeH', '[SeD1H1]-*'),
91 ('dSe', '[SeD1H0]=*'),
92 ('ssSe', '[SeD2H0](-*)-*'),
93 ('aaSe', '[SeD2H0](:*):*'),
94 ('dssSe', '[SeD3H0](=*)(-*)-*'),
95 ('ddssSe','[SeD4H0](=*)(=*)(-*)-*'),
96
97 ('sBr','[BrD1]-*'),
98
99 ('sSnH3', '[SnD1H3]-*'),
100 ('ssSnH2','[SnD2H2](-*)-*'),
101 ('sssSnH','[SnD3H1](-*)(-*)-*'),
102 ('ssssSn','[SnD4H0](-*)(-*)(-*)-*'),
103
104 ('sI','[ID1]-*'),
105
106 ('sPbH3', '[PbD1H3]-*'),
107 ('ssPbH2','[PbD2H2](-*)-*'),
108 ('sssPbH','[PbD3H1](-*)(-*)-*'),
109 ('ssssPb','[PbD4H0](-*)(-*)(-*)-*'),
110 ]
111
112 esPatterns=None
129
130
132 """ assigns each atom in a molecule to an EState type
133
134 **Returns:**
135
136 list of tuples (atoms can possibly match multiple patterns) with atom types
137
138 """
139 if esPatterns is None:
140 BuildPatts()
141 nAtoms = mol.GetNumAtoms()
142 res = [None]*nAtoms
143 for name,patt in esPatterns:
144 matches = mol.GetSubstructMatches(patt,uniquify=0)
145 for match in matches:
146 idx = match[0]
147 if res[idx] is None:
148 res[idx] = [name]
149 elif name not in res[idx]:
150 res[idx].append(name)
151 for i,v in enumerate(res):
152 if v is not None:
153 res[i] = tuple(v)
154 else:
155 res[i] = ()
156 return res
157