Package rdkit :: Package Chem :: Module MolSurf
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.MolSurf

  1  # $Id: MolSurf.py 997 2009-02-25 06:12:43Z glandrum $ 
  2  # 
  3  # Copyright (C) 2001-2008 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  """ Exposes functionality for MOE-like approximate molecular surface area 
  8  descriptors. 
  9   
 10    The MOE-like VSA descriptors are also calculated here 
 11   
 12  """ 
 13  from rdkit import Chem 
 14  from rdkit.Chem.PeriodicTable import numTable 
 15  from rdkit.Chem import Crippen 
 16  from rdkit.Chem import rdPartialCharges,rdMolDescriptors 
 17  import numpy 
 18  import bisect 
 19  radCol = 5 
 20  bondScaleFacts = [ .1,0,.2,.3] # aromatic,single,double,triple 
 21   
22 -def _LabuteHelper(mol,includeHs=1,force=0):
23 """ *Internal Use Only* 24 helper function for LabuteASA calculation 25 returns an array of atomic contributions to the ASA 26 27 **Note:** Changes here affect the version numbers of all ASA descriptors 28 29 """ 30 if not force: 31 try: 32 res = mol._labuteContribs 33 except AttributeError: 34 pass 35 else: 36 if res: 37 return res 38 tpl = rdMolDescriptors._CalcLabuteASAContribs(mol,includeHs) 39 ats,hs=tpl 40 Vi=[hs]+list(ats) 41 mol._labuteContribs=Vi 42 return Vi
43 -def _pyLabuteHelper(mol,includeHs=1,force=0):
44 """ *Internal Use Only* 45 helper function for LabuteASA calculation 46 returns an array of atomic contributions to the ASA 47 48 **Note:** Changes here affect the version numbers of all ASA descriptors 49 50 """ 51 if not force: 52 try: 53 res = mol._labuteContribs 54 except AttributeError: 55 pass 56 else: 57 if res.all(): 58 return res 59 60 nAts = mol.GetNumAtoms() 61 Vi = numpy.zeros(nAts+1,'d') 62 rads = numpy.zeros(nAts+1,'d') 63 64 # 0 contains the H information 65 rads[0] = numTable[1][radCol] 66 for i in xrange(nAts): 67 rads[i+1] = numTable[mol.GetAtomWithIdx(i).GetAtomicNum()][radCol] 68 69 # start with explicit bonds 70 for bond in mol.GetBonds(): 71 idx1 = bond.GetBeginAtomIdx()+1 72 idx2 = bond.GetEndAtomIdx()+1 73 Ri = rads[idx1] 74 Rj = rads[idx2] 75 76 if not bond.GetIsAromatic(): 77 bij = Ri+Rj - bondScaleFacts[bond.GetBondType()] 78 else: 79 bij = Ri+Rj - bondScaleFacts[0] 80 dij = min( max( abs(Ri-Rj), bij), Ri+Rj) 81 Vi[idx1] += Rj*Rj - (Ri-dij)**2 / dij 82 Vi[idx2] += Ri*Ri - (Rj-dij)**2 / dij 83 84 # add in hydrogens 85 if includeHs: 86 j = 0 87 Rj = rads[j] 88 for i in xrange(1,nAts+1): 89 Ri = rads[i] 90 bij = Ri+Rj 91 dij = min( max( abs(Ri-Rj), bij), Ri+Rj) 92 Vi[i] += Rj*Rj - (Ri-dij)**2 / dij 93 Vi[j] += Ri*Ri - (Rj-dij)**2 / dij 94 95 for i in xrange(nAts+1): 96 Ri = rads[i] 97 Vi[i] = 4*pi * Ri**2 - pi * Ri * Vi[i] 98 99 mol._labuteContribs=Vi 100 return Vi
101 102 #def SMR_VSA(mol,bins=[0.11,0.26,0.35,0.39,0.44,0.485,0.56]): 103 # original default bins from assuming Labute values are logs 104 # mrBins=[1.29, 1.82, 2.24, 2.45, 2.75, 3.05, 3.63] 105 mrBins=[1.29, 1.82, 2.24, 2.45, 2.75, 3.05, 3.63,3.8,4.0]
106 -def SMR_VSA_(mol,bins=None,force=1):
107 """ *Internal Use Only* 108 """ 109 if not force: 110 try: 111 res = mol._smrVSA 112 except AttributeError: 113 pass 114 else: 115 if res.all(): 116 return res 117 118 if bins is None: bins = mrBins 119 Crippen._Init() 120 propContribs = Crippen._GetAtomContribs(mol,force=force) 121 volContribs = _LabuteHelper(mol) 122 123 ans = numpy.zeros(len(bins)+1,'d') 124 for i in range(len(propContribs)): 125 prop = propContribs[i] 126 vol = volContribs[i+1] 127 if prop is not None: 128 bin = bisect.bisect_right(bins,prop[1]) 129 ans[bin] += vol 130 131 mol._smrVSA=ans 132 return ans
133 134 # 135 # Original bins (from Labute paper) are: 136 # [-0.4,-0.2,0,0.1,0.15,0.2,0.25,0.3,0.4] 137 # 138 logpBins=[-0.4,-0.2,0,0.1,0.15,0.2,0.25,0.3,0.4,0.5,0.6]
139 -def SlogP_VSA_(mol,bins=None,force=1):
140 """ *Internal Use Only* 141 """ 142 if not force: 143 try: 144 res = mol._slogpVSA 145 except AttributeError: 146 pass 147 else: 148 if res.all(): 149 return res 150 151 if bins is None: bins = logpBins 152 Crippen._Init() 153 propContribs = Crippen._GetAtomContribs(mol,force=force) 154 volContribs = _LabuteHelper(mol) 155 156 ans = numpy.zeros(len(bins)+1,'d') 157 for i in range(len(propContribs)): 158 prop = propContribs[i] 159 vol = volContribs[i+1] 160 if prop is not None: 161 bin = bisect.bisect_right(bins,prop[0]) 162 ans[bin] += vol 163 164 mol._slogpVSA=ans 165 return ans
166 167 chgBins=[-.3,-.25,-.20,-.15,-.10,-.05,0,.05,.10,.15,.20,.25,.30]
168 -def PEOE_VSA_(mol,bins=None,force=1):
169 """ *Internal Use Only* 170 """ 171 if not force: 172 try: 173 res = mol._peoeVSA 174 except AttributeError: 175 pass 176 else: 177 if res.all(): 178 return res 179 if bins is None: bins = chgBins 180 Crippen._Init() 181 #print '\ts:',repr(mol.GetMol()) 182 #print '\t\t:',len(mol.GetAtoms()) 183 rdPartialCharges.ComputeGasteigerCharges(mol) 184 185 #propContribs = [float(x.GetProp('_GasteigerCharge')) for x in mol.GetAtoms()] 186 propContribs=[] 187 for at in mol.GetAtoms(): 188 p = at.GetProp('_GasteigerCharge') 189 try: 190 v = float(p) 191 except ValueError: 192 v = 0.0 193 propContribs.append(v) 194 #print '\tp',propContribs 195 volContribs = _LabuteHelper(mol) 196 #print '\tv',volContribs 197 198 ans = numpy.zeros(len(bins)+1,'d') 199 for i in range(len(propContribs)): 200 prop = propContribs[i] 201 vol = volContribs[i+1] 202 if prop is not None: 203 bin = bisect.bisect_right(bins,prop) 204 ans[bin] += vol 205 206 mol._peoeVSA=ans 207 return ans
208 209 #------------------------------------------------- 210 # install the various VSA descriptors in the namespace 211 # (this is used by AvailDescriptors)
212 -def _InstallDescriptors():
213 for i in range(len(mrBins)): 214 fn = lambda x,y=i:SMR_VSA_(x,force=0)[y] 215 if i > 0: 216 fn.__doc__="MOE MR VSA Descriptor %d (% 4.2f <= x < % 4.2f)"%(i+1,mrBins[i-1],mrBins[i]) 217 else: 218 fn.__doc__="MOE MR VSA Descriptor %d (-inf < x < % 4.2f)"%(i+1,mrBins[i]) 219 name="SMR_VSA%d"%(i+1) 220 fn.version="1.0.1" 221 globals()[name]=fn 222 i+=1 223 fn = lambda x,y=i:SMR_VSA_(x,force=0)[y] 224 fn.__doc__="MOE MR VSA Descriptor %d (% 4.2f <= x < inf)"%(i+1,mrBins[i-1]) 225 fn.version="1.0.1" 226 name="SMR_VSA%d"%(i+1) 227 globals()[name]=fn 228 229 for i in range(len(logpBins)): 230 fn = lambda x,y=i:SlogP_VSA_(x,force=0)[y] 231 if i > 0: 232 fn.__doc__="MOE logP VSA Descriptor %d (% 4.2f <= x < % 4.2f)"%(i+1,logpBins[i-1], 233 logpBins[i]) 234 else: 235 fn.__doc__="MOE logP VSA Descriptor %d (-inf < x < % 4.2f)"%(i+1,logpBins[i]) 236 name="SlogP_VSA%d"%(i+1) 237 fn.version="1.0.1" 238 globals()[name]=fn 239 i+=1 240 fn = lambda x,y=i:SlogP_VSA_(x,force=0)[y] 241 fn.__doc__="MOE logP VSA Descriptor %d (% 4.2f <= x < inf)"%(i+1,logpBins[i-1]) 242 fn.version="1.0.1" 243 name="SlogP_VSA%d"%(i+1) 244 globals()[name]=fn 245 246 for i in range(len(chgBins)): 247 fn = lambda x,y=i:PEOE_VSA_(x,force=0)[y] 248 if i > 0: 249 fn.__doc__="MOE Charge VSA Descriptor %d (% 4.2f <= x < % 4.2f)"%(i+1,chgBins[i-1], 250 chgBins[i]) 251 else: 252 fn.__doc__="MOE Chage VSA Descriptor %d (-inf < x < % 4.2f)"%(i+1,chgBins[i]) 253 name="PEOE_VSA%d"%(i+1) 254 fn.version="1.0.1" 255 globals()[name]=fn 256 i+=1 257 fn = lambda x,y=i:PEOE_VSA_(x,force=0)[y] 258 fn.version="1.0.1" 259 fn.__doc__="MOE Charge VSA Descriptor %d (% 4.2f <= x < inf)"%(i+1,chgBins[i-1]) 260 name="PEOE_VSA%d"%(i+1) 261 globals()[name]=fn 262 fn = None
263 # Change log for the MOE-type descriptors: 264 # version 1.0.1: optimizations, values unaffected 265 _InstallDescriptors() 266
267 -def pyLabuteASA(mol,includeHs=1):
268 """ calculates Labute's Approximate Surface Area (ASA from MOE) 269 270 Definition from P. Labute's article in the Journal of the Chemical Computing Group 271 and J. Mol. Graph. Mod. _18_ 464-477 (2000) 272 273 """ 274 Vi = _LabuteHelper(mol,includeHs=includeHs) 275 return sum(Vi)
276 pyLabuteASA.version="1.0.1" 277 # Change log for LabuteASA: 278 # version 1.0.1: optimizations, values unaffected 279 LabuteASA=lambda *x,**y:rdMolDescriptors.CalcLabuteASA(*x,**y) 280 LabuteASA.version=rdMolDescriptors.__CalcLabuteASA_version__ 281 282
283 -def _TPSAContribs(mol,verbose=False):
284 """ calculates atomic contributions to a molecules TPSA 285 286 Algorithm in: 287 P. Ertl, B. Rohde, P. Selzer 288 Fast Calculation of Molecular Polar Surface Area as a Sum of Fragment-based 289 Contributions and Its Application to the Prediction of Drug Transport 290 Properties, J.Med.Chem. 43, 3714-3717, 2000 291 292 Implementation based on the Daylight contrib program tpsa.c 293 """ 294 res = [0]*mol.GetNumAtoms() 295 for i in range(mol.GetNumAtoms()): 296 atom = mol.GetAtomWithIdx(i) 297 atNum = atom.GetAtomicNum() 298 if atNum in [7,8]: 299 #nHs = atom.GetImplicitValence()-atom.GetHvyValence() 300 nHs = atom.GetTotalNumHs() 301 chg = atom.GetFormalCharge() 302 isArom = atom.GetIsAromatic() 303 in3Ring = atom.IsInRingSize(3) 304 305 bonds = atom.GetBonds() 306 numNeighbors = atom.GetDegree() 307 nSing = 0 308 nDoub = 0 309 nTrip = 0 310 nArom = 0 311 for bond in bonds: 312 otherAt = bond.GetOtherAtom(atom) 313 if otherAt.GetAtomicNum()!=1: 314 if bond.GetIsAromatic(): 315 nArom += 1 316 else: 317 ord = bond.GetBondType() 318 if ord == Chem.BondType.SINGLE: 319 nSing += 1 320 elif ord == Chem.BondType.DOUBLE: 321 nDoub += 1 322 elif ord == Chem.BondType.TRIPLE: 323 nTrip += 1 324 else: 325 numNeighbors-=1 326 nHs += 1 327 tmp = -1 328 if atNum == 7: 329 if numNeighbors == 1: 330 if nHs==0 and nTrip==1 and chg==0: tmp = 23.79 331 elif nHs==1 and nDoub==1 and chg==0: tmp = 23.85 332 elif nHs==2 and nSing==1 and chg==0: tmp = 26.02 333 elif nHs==2 and nDoub==1 and chg==1: tmp = 25.59 334 elif nHs==3 and nSing==1 and chg==1: tmp = 27.64 335 elif numNeighbors == 2: 336 if nHs==0 and nSing==1 and nDoub==1 and chg==0: tmp = 12.36 337 elif nHs==0 and nTrip==1 and nDoub==1 and chg==0: tmp = 13.60 338 elif nHs==1 and nSing==2 and chg==0: 339 if not in3Ring: tmp = 12.03 340 else: tmp=21.94 341 elif nHs==0 and nTrip==1 and nSing==1 and chg==1: tmp = 4.36 342 elif nHs==1 and nDoub==1 and nSing==1 and chg==1: tmp = 13.97 343 elif nHs==2 and nSing==2 and chg==1: tmp = 16.61 344 elif nHs==0 and nArom==2 and chg==0: tmp = 12.89 345 elif nHs==1 and nArom==2 and chg==0: tmp = 15.79 346 elif nHs==1 and nArom==2 and chg==1: tmp = 14.14 347 elif numNeighbors == 3: 348 if nHs==0 and nSing==3 and chg==0: 349 if not in3Ring: tmp = 3.24 350 else: tmp = 3.01 351 elif nHs==0 and nSing==1 and nDoub==2 and chg==0: tmp = 11.68 352 elif nHs==0 and nSing==2 and nDoub==1 and chg==1: tmp = 3.01 353 elif nHs==1 and nSing==3 and chg==1: tmp = 4.44 354 elif nHs==0 and nArom==3 and chg==0: tmp = 4.41 355 elif nHs==0 and nSing==1 and nArom==2 and chg==0: tmp = 4.93 356 elif nHs==0 and nDoub==1 and nArom==2 and chg==0: tmp = 8.39 357 elif nHs==0 and nArom==3 and chg==1: tmp = 4.10 358 elif nHs==0 and nSing==1 and nArom==2 and chg==1: tmp = 3.88 359 elif numNeighbors == 4: 360 if nHs==0 and nSing==4 and chg==1: tmp = 0.00 361 if tmp < 0.0: 362 tmp = 30.5 - numNeighbors * 8.2 + nHs * 1.5 363 if tmp < 0.0: tmp = 0.0 364 elif atNum==8: 365 #print nHs,nSing,chg 366 if numNeighbors == 1: 367 if nHs==0 and nDoub==1 and chg==0: tmp = 17.07 368 elif nHs==1 and nSing==1 and chg==0: tmp = 20.23 369 elif nHs==0 and nSing==1 and chg==-1: tmp = 23.06 370 elif numNeighbors == 2: 371 if nHs==0 and nSing==2 and chg==0: 372 if not in3Ring: tmp = 9.23 373 else: tmp = 12.53 374 elif nHs==0 and nArom==2 and chg==0: tmp = 13.14 375 376 if tmp < 0.0: 377 tmp = 28.5 - numNeighbors * 8.6 + nHs * 1.5 378 if tmp < 0.0: tmp = 0.0 379 if verbose: 380 print '\t',atom.GetIdx(),atom.GetSymbol(),atNum,nHs,nSing,nDoub,nTrip,nArom,chg,tmp 381 382 res[atom.GetIdx()] = tmp 383 return res
384
385 -def TPSA(mol,verbose=False):
386 """ calculates the polar surface area of a molecule based upon fragments 387 388 Algorithm in: 389 P. Ertl, B. Rohde, P. Selzer 390 Fast Calculation of Molecular Polar Surface Area as a Sum of Fragment-based 391 Contributions and Its Application to the Prediction of Drug Transport 392 Properties, J.Med.Chem. 43, 3714-3717, 2000 393 394 Implementation based on the Daylight contrib program tpsa.c 395 """ 396 contribs = _TPSAContribs(mol,verbose=verbose) 397 res = 0.0 398 for contrib in contribs: 399 res += contrib 400 return res
401 TPSA.version="1.0.1" 402 403 404 405 if __name__ == '__main__': 406 smis = ['C','CC','CCC','CCCC','CO','CCO','COC'] 407 smis = ['C(=O)O','c1ccccc1'] 408 for smi in smis: 409 m = Chem.MolFromSmiles(smi) 410 #print smi, LabuteASA(m); 411 print '-----------\n',smi 412 #print 'M:',['% 4.2f'%x for x in SMR_VSA_(m)] 413 #print 'L:',['% 4.2f'%x for x in SlogP_VSA_(m)] 414 print 'P:',['% 4.2f'%x for x in PEOE_VSA_(m)] 415 print 'P:',['% 4.2f'%x for x in PEOE_VSA_(m)] 416 print 417