Package rdkit :: Package Chem :: Module Crippen
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Crippen

  1  # $Id: Crippen.py 997 2009-02-25 06:12:43Z glandrum $ 
  2  # 
  3  # Copyright (C) 2002-2008 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  """ Atom-based calculation of LogP and MR using Crippen's approach 
  8   
  9   
 10      Reference: 
 11        S. A. Wildman and G. M. Crippen *JCICS* _39_ 868-873 (1999) 
 12   
 13   
 14  """ 
 15  import os 
 16  from rdkit import RDConfig 
 17  from rdkit import Chem 
 18  from rdkit.Chem import rdMolDescriptors 
 19  import numpy 
 20   
 21  _smartsPatterns = {} 
 22  _patternOrder = [] 
 23  # this is the file containing the atom contributions 
 24  defaultPatternFileName = os.path.join(RDConfig.RDDataDir,'Crippen.txt') 
 25   
26 -def _ReadPatts(fileName):
27 """ *Internal Use Only* 28 29 parses the pattern list from the data file 30 31 """ 32 patts = {} 33 order = [] 34 for line in open(fileName,'r').xreadlines(): 35 if line[0] != '#': 36 splitLine = line.split('\t') 37 if len(splitLine)>=4 and splitLine[0] != '': 38 sma = splitLine[1] 39 if sma!='SMARTS': 40 sma.replace('"','') 41 try: 42 p = Chem.MolFromSmarts(sma) 43 except: 44 pass 45 else: 46 if p: 47 if len(splitLine[0])>1 and splitLine[0][1] not in 'S0123456789': 48 cha = splitLine[0][:2] 49 else: 50 cha = splitLine[0][0] 51 logP = float(splitLine[2]) 52 if splitLine[3] != '': 53 mr = float(splitLine[3]) 54 else: 55 mr = 0.0 56 if cha not in order: 57 order.append(cha) 58 l = patts.get(cha,[]) 59 l.append((sma,p,logP,mr)) 60 patts[cha] = l 61 else: 62 print 'Problems parsing smarts: %s'%(sma) 63 return order,patts
64 65 _GetAtomContribs=rdMolDescriptors._CalcCrippenContribs
66 -def _pyGetAtomContribs(mol,patts=None,order=None,verbose=0,force=0):
67 """ *Internal Use Only* 68 69 calculates atomic contributions to the LogP and MR values 70 71 if the argument *force* is not set, we'll use the molecules stored 72 _crippenContribs value when possible instead of re-calculating. 73 74 **Note:** Changes here affect the version numbers of MolLogP and MolMR 75 as well as the VSA descriptors in Chem.MolSurf 76 77 """ 78 if not force and hasattr(mol,'_crippenContribs'): 79 return mol._crippenContribs 80 81 if patts is None: 82 patts = _smartsPatterns 83 order = _patternOrder 84 85 nAtoms = mol.GetNumAtoms() 86 atomContribs = [(0.,0.)]*nAtoms 87 doneAtoms=[0]*nAtoms 88 nAtomsFound=0 89 done = False 90 for cha in order: 91 pattVect = patts[cha] 92 for sma,patt,logp,mr in pattVect: 93 #print 'try:',entry[0] 94 for match in mol.GetSubstructMatches(patt,False,False): 95 firstIdx = match[0] 96 if not doneAtoms[firstIdx]: 97 doneAtoms[firstIdx]=1 98 atomContribs[firstIdx] = (logp,mr) 99 if verbose: 100 print '\tAtom %d: %s %4.4f %4.4f'%(match[0],sma,logp,mr) 101 nAtomsFound+=1 102 if nAtomsFound>=nAtoms: 103 done=True 104 break 105 if done: break 106 mol._crippenContribs = atomContribs 107 return atomContribs
108
109 -def _Init():
110 global _smartsPatterns,_patternOrder 111 if _smartsPatterns == {}: 112 _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName)
113
114 -def pyMolLogP(inMol,patts=None,order=None,verbose=0,addHs=1):
115 """ Crippen LogP value 116 117 Definition From JCICS _39_ 868-873 (1999) 118 119 **Arguments** 120 121 - inMol: a molecule 122 123 - patts: (optional) if provided this should be a dictionary of 4 tuples: 124 125 1) smarts text 126 127 2) SmartsPattern object 128 129 3) logp contribution 130 131 4) MR contribution 132 133 - order: (optional) a list with the order in which the keys of the pattern dictionary 134 should be evaluated 135 136 - verbose: (optional) toggles verbose printing 137 138 - addHs: (optional) toggles adding of Hs to the molecule for the calculation. 139 If positive, explicit and implicit Hs will be used in the calculation. 140 If this negative, then only explicit hydrogens will be taken into account in the 141 calculation. This is available to allow matching of results obtained with the old 142 backend code. 143 144 """ 145 if addHs < 0: 146 mol = Chem.AddHs(inMol,1) 147 elif addHs > 0: 148 mol = Chem.AddHs(inMol,0) 149 else: 150 mol = inMol 151 152 if patts is None: 153 global _smartsPatterns,_patternOrder 154 if _smartsPatterns == {}: 155 _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName) 156 patts = _smartsPatterns 157 order = _patternOrder 158 atomContribs = _pyGetAtomContribs(mol,patts,order,verbose=verbose) 159 return numpy.sum(atomContribs,0)[0]
160 pyMolLogP.version="1.1.0" 161
162 -def pyMolMR(inMol,patts=None,order=None,verbose=0,addHs=1):
163 """ Crippen MR value 164 165 Definition From JCICS _39_ 868-873 (1999) 166 167 **Arguments** 168 169 - inMol: a molecule 170 171 - patts: (optional) if provided this should be a dictionary of 4 tuples: 172 173 1) smarts text 174 175 2) SmartsPattern object 176 177 3) logp contribution 178 179 4) MR contribution 180 181 - order: (optional) a list with the order in which the keys of the pattern dictionary 182 should be evaluated 183 184 - verbose: (optional) toggles verbose printing 185 186 - addHs: (optional) toggles adding of Hs to the molecule for the calculation. 187 If positive, explicit and implicit Hs will be used in the calculation. 188 If this negative, then only explicit hydrogens will be taken into account in the 189 calculation. This is available to allow matching of results obtained with the old 190 backend code. 191 192 """ 193 if addHs < 0: 194 mol = Chem.AddHs(inMol,1) 195 elif addHs > 0: 196 mol = Chem.AddHs(inMol,0) 197 else: 198 mol = inMol 199 200 if patts is None: 201 global _smartsPatterns,_patternOrder 202 if _smartsPatterns == {}: 203 _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName) 204 patts = _smartsPatterns 205 order = _patternOrder 206 207 atomContribs = _pyGetAtomContribs(mol,patts,order,verbose=verbose) 208 return numpy.sum(atomContribs,0)[1]
209 pyMolMR.version="1.1.0" 210 211 MolLogP=lambda *x,**y:rdMolDescriptors.CalcCrippenDescriptors(*x,**y)[0] 212 MolLogP.version=rdMolDescriptors.__CalcCrippenDescriptors_version__ 213 MolLogP.__doc__=""" Wildman-Crippen LogP value 214 215 Uses an atom-based scheme based on the values in the paper: 216 S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999) 217 218 **Arguments** 219 220 - inMol: a molecule 221 222 - addHs: (optional) toggles adding of Hs to the molecule for the calculation. 223 If true, hydrogens will be added to the molecule and used in the calculation. 224 225 """ 226 227 MolMR=lambda *x,**y:rdMolDescriptors.CalcCrippenDescriptors(*x,**y)[1] 228 MolMR.version=rdMolDescriptors.__CalcCrippenDescriptors_version__ 229 MolMR.__doc__=""" Wildman-Crippen MR value 230 231 Uses an atom-based scheme based on the values in the paper: 232 S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999) 233 234 **Arguments** 235 236 - inMol: a molecule 237 238 - addHs: (optional) toggles adding of Hs to the molecule for the calculation. 239 If true, hydrogens will be added to the molecule and used in the calculation. 240 241 """ 242 243 if __name__=='__main__': 244 import sys 245 246 if len(sys.argv): 247 ms = [] 248 verbose=0 249 if '-v' in sys.argv: 250 verbose=1 251 sys.argv.remove('-v') 252 for smi in sys.argv[1:]: 253 ms.append((smi,Chem.MolFromSmiles(smi))) 254 255 for smi,m in ms: 256 print 'Mol: %s'%(smi) 257 logp = MolLogP(m,verbose=verbose) 258 print '----' 259 mr = MolMR(m,verbose=verbose) 260 print 'Res:',logp,mr 261 newM = Chem.AddHs(m) 262 logp = MolLogP(newM,addHs=0) 263 mr = MolMR(newM,addHs=0) 264 print '\t',logp,mr 265 print '-*-*-*-*-*-*-*-*' 266