Package rdkit :: Package Chem :: Package EState :: Module AtomTypes
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.EState.AtomTypes

  1  # $Id: AtomTypes.py 997 2009-02-25 06:12:43Z glandrum $ 
  2  # 
  3  #  Copyright (C) 2002-2006  greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  """ contains SMARTS definitions and calculators for EState atom types 
  8   
  9   defined in: Hall and Kier JCICS _35_ 1039-1045 (1995)  Table 1 
 10  """ 
 11  from rdkit import Chem 
 12   
 13  _rawD = [ 
 14    ('sLi','[LiD1]-*'), 
 15   
 16    ('ssBe','[BeD2](-*)-*'), 
 17    ('ssssBe','[BeD4](-*)(-*)(-*)-*'), 
 18   
 19    ('ssBH', '[BD2H](-*)-*'), 
 20    ('sssB', '[BD3](-*)(-*)-*'), 
 21    ('ssssB','[BD4](-*)(-*)(-*)-*'), 
 22   
 23    ('sCH3', '[CD1H3]-*'), 
 24    ('dCH2', '[CD1H2]=*'), 
 25    ('ssCH2','[CD2H2](-*)-*'), 
 26    ('tCH',  '[CD1H]#*'), 
 27    ('dsCH', '[CD2H](=*)-*'), 
 28    ('aaCH', '[C,c;D2H](:*):*'), 
 29    ('sssCH','[CD3H](-*)(-*)-*'), 
 30    ('ddC',  '[CD2H0](=*)=*'), 
 31    ('tsC',  '[CD2H0](#*)-*'), 
 32    ('dssC', '[CD3H0](=*)(-*)-*'),   
 33    ('aasC', '[C,c;D3H0](:*)(:*)-*'), 
 34    ('aaaC', '[C,c;D3H0](:*)(:*):*'), 
 35    ('ssssC','[CD4H0](-*)(-*)(-*)-*'), 
 36   
 37    ('sNH3', '[ND1H3]-*'), 
 38    ('sNH2', '[ND1H2]-*'), 
 39    ('ssNH2','[ND2H2](-*)-*'), 
 40    ('dNH',  '[ND1H]=*'), 
 41    ('ssNH', '[ND2H](-*)-*'), 
 42    ('aaNH', '[N,nD2H](:*):*'), 
 43    ('tN',   '[ND1H0]#*'), 
 44    ('sssNH','[ND3H](-*)(-*)-*'), 
 45    ('dsN',  '[ND2H0](=*)-*'), 
 46    ('aaN',  '[N,nD2H0](:*):*'), 
 47    ('sssN', '[ND3H0](-*)(-*)-*'), 
 48    ('ddsN', '[ND3H0](~[OD1H0])(~[OD1H0])-,:*'),  # mod 
 49    ('aasN', '[N,nD3H0](:*)(:*)-,:*'),              # mod 
 50    ('ssssN','[ND4H0](-*)(-*)(-*)-*'), 
 51   
 52    ('sOH','[OD1H]-*'), 
 53    ('dO', '[OD1H0]=*'), 
 54    ('ssO','[OD2H0](-*)-*'), 
 55    ('aaO','[O,oD2H0](:*):*'), 
 56   
 57    ('sF','[FD1]-*'), 
 58   
 59    ('sSiH3', '[SiD1H3]-*'), 
 60    ('ssSiH2','[SiD2H2](-*)-*'), 
 61    ('sssSiH','[SiD3H1](-*)(-*)-*'), 
 62    ('ssssSi','[SiD4H0](-*)(-*)(-*)-*'), 
 63   
 64    ('sPH2',  '[PD1H2]-*'), 
 65    ('ssPH',  '[PD2H1](-*)-*'), 
 66    ('sssP',  '[PD3H0](-*)(-*)-*'), 
 67    ('dsssP', '[PD4H0](=*)(-*)(-*)-*'), 
 68    ('sssssP','[PD5H0](-*)(-*)(-*)(-*)-*'), 
 69      
 70    ('sSH',  '[SD1H1]-*'), 
 71    ('dS',   '[SD1H0]=*'), 
 72    ('ssS',  '[SD2H0](-*)-*'), 
 73    ('aaS',  '[S,sD2H0](:*):*'), 
 74    ('dssS', '[SD3H0](=*)(-*)-*'), 
 75    ('ddssS','[SD4H0](~[OD1H0])(~[OD1H0])(-*)-*'),  # mod 
 76   
 77    ('sCl', '[ClD1]-*'), 
 78   
 79    ('sGeH3', '[GeD1H3](-*)'), 
 80    ('ssGeH2','[GeD2H2](-*)-*'), 
 81    ('sssGeH','[GeD3H1](-*)(-*)-*'), 
 82    ('ssssGe','[GeD4H0](-*)(-*)(-*)-*'), 
 83   
 84    ('sAsH2',  '[AsD1H2]-*'), 
 85    ('ssAsH',  '[AsD2H1](-*)-*'), 
 86    ('sssAs',  '[AsD3H0](-*)(-*)-*'), 
 87    ('sssdAs', '[AsD4H0](=*)(-*)(-*)-*'), 
 88    ('sssssAs','[AsD5H0](-*)(-*)(-*)(-*)-*'), 
 89   
 90    ('sSeH',  '[SeD1H1]-*'), 
 91    ('dSe',   '[SeD1H0]=*'), 
 92    ('ssSe',  '[SeD2H0](-*)-*'), 
 93    ('aaSe',  '[SeD2H0](:*):*'), 
 94    ('dssSe', '[SeD3H0](=*)(-*)-*'), 
 95    ('ddssSe','[SeD4H0](=*)(=*)(-*)-*'), 
 96   
 97    ('sBr','[BrD1]-*'), 
 98   
 99    ('sSnH3', '[SnD1H3]-*'), 
100    ('ssSnH2','[SnD2H2](-*)-*'), 
101    ('sssSnH','[SnD3H1](-*)(-*)-*'), 
102    ('ssssSn','[SnD4H0](-*)(-*)(-*)-*'), 
103   
104    ('sI','[ID1]-*'), 
105   
106    ('sPbH3', '[PbD1H3]-*'), 
107    ('ssPbH2','[PbD2H2](-*)-*'), 
108    ('sssPbH','[PbD3H1](-*)(-*)-*'), 
109    ('ssssPb','[PbD4H0](-*)(-*)(-*)-*'), 
110  ] 
111   
112  esPatterns=None 
113 -def BuildPatts(rawV=None):
114 """ Internal Use Only 115 116 """ 117 global esPatterns,_rawD 118 if rawV is None: 119 rawV = _rawD 120 121 esPatterns = [None]*len(rawV) 122 for i,(name,sma) in enumerate(rawV): 123 try: 124 patt = Chem.MolFromSmarts(sma) 125 except: 126 sys.stderr.write('WARNING: problems with pattern %s (name: %s), skipped.\n'%(sma,name)) 127 else: 128 esPatterns[i] = name,patt
129 130
131 -def TypeAtoms(mol):
132 """ assigns each atom in a molecule to an EState type 133 134 **Returns:** 135 136 list of tuples (atoms can possibly match multiple patterns) with atom types 137 138 """ 139 if esPatterns is None: 140 BuildPatts() 141 nAtoms = mol.GetNumAtoms() 142 res = [None]*nAtoms 143 for name,patt in esPatterns: 144 matches = mol.GetSubstructMatches(patt,uniquify=0) 145 for match in matches: 146 idx = match[0] 147 if res[idx] is None: 148 res[idx] = [name] 149 elif name not in res[idx]: 150 res[idx].append(name) 151 for i,v in enumerate(res): 152 if v is not None: 153 res[i] = tuple(v) 154 else: 155 res[i] = () 156 return res
157