Package Chem :: Module Crippen
[hide private]
[frames] | no frames]

Source Code for Module Chem.Crippen

  1  # $Id: Crippen.py 645 2008-05-06 06:57:27Z glandrum $ 
  2  # 
  3  # Copyright (C) 2002-2008 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  """ Atom-based calculation of LogP and MR using Crippen's approach 
  8   
  9   
 10      Reference: 
 11        S. A. Wildman and G. M. Crippen *JCICS* _39_ 868-873 (1999) 
 12   
 13   
 14  """ 
 15  import os 
 16  import RDConfig 
 17  import Chem 
 18  from Chem import rdMolDescriptors 
 19  from Numeric import * 
 20   
 21  _smartsPatterns = {} 
 22  _patternOrder = [] 
 23  # this is the file containing the atom contributions 
 24  defaultPatternFileName = os.path.join(RDConfig.RDDataDir,'Crippen.txt') 
 25   
26 -def _ReadPatts(fileName):
27 """ *Internal Use Only* 28 29 parses the pattern list from the data file 30 31 """ 32 patts = {} 33 order = [] 34 for line in open(fileName,'r').xreadlines(): 35 if line[0] != '#': 36 splitLine = line.split('\t') 37 if len(splitLine)>=4 and splitLine[0] != '': 38 sma = splitLine[1] 39 if sma!='SMARTS': 40 sma.replace('"','') 41 try: 42 p = Chem.MolFromSmarts(sma) 43 except: 44 pass 45 else: 46 if p: 47 if len(splitLine[0])>1 and splitLine[0][1] not in 'S0123456789': 48 cha = splitLine[0][:2] 49 else: 50 cha = splitLine[0][0] 51 logP = float(splitLine[2]) 52 if splitLine[3] != '': 53 mr = float(splitLine[3]) 54 else: 55 mr = 0.0 56 if cha not in order: 57 order.append(cha) 58 l = patts.get(cha,[]) 59 l.append((sma,p,logP,mr)) 60 patts[cha] = l 61 else: 62 print 'Problems parsing smarts: %s'%(sma) 63 return order,patts
64 65 _GetAtomContribs=rdMolDescriptors._CalcCrippenContribs
66 -def _pyGetAtomContribs(mol,patts=None,order=None,verbose=0,force=0):
67 """ *Internal Use Only* 68 69 calculates atomic contributions to the LogP and MR values 70 71 if the argument *force* is not set, we'll use the molecules stored 72 _crippenContribs value when possible instead of re-calculating. 73 74 **Note:** Changes here affect the version numbers of MolLogP and MolMR 75 as well as the VSA descriptors in Chem.MolSurf 76 77 """ 78 if not force and hasattr(mol,'_crippenContribs'): 79 return mol._crippenContribs 80 81 if patts is None: 82 patts = _smartsPatterns 83 order = _patternOrder 84 85 nAtoms = mol.GetNumAtoms() 86 atomContribs = [(0.,0.)]*nAtoms 87 doneAtoms=[0]*nAtoms 88 nAtomsFound=0 89 done = False 90 for cha in order: 91 pattVect = patts[cha] 92 for sma,patt,logp,mr in pattVect: 93 #print 'try:',entry[0] 94 for match in mol.GetSubstructMatches(patt,False,False): 95 firstIdx = match[0] 96 if not doneAtoms[firstIdx]: 97 doneAtoms[firstIdx]=1 98 atomContribs[firstIdx] = (logp,mr) 99 if verbose: 100 print '\tAtom %d: %s %4.4f %4.4f'%(match[0],sma,logp,mr) 101 nAtomsFound+=1 102 if nAtomsFound>=nAtoms: 103 done=True 104 break 105 if done: break 106 mol._crippenContribs = atomContribs 107 return atomContribs
108
109 -def _Init():
110 global _smartsPatterns,_patternOrder 111 if _smartsPatterns == {}: 112 _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName)
113
114 -def pyMolLogP(inMol,patts=None,order=None,verbose=0,addHs=1):
115 """ Crippen LogP value 116 117 Definition From JCICS _39_ 868-873 (1999) 118 119 **Arguments** 120 121 - inMol: a molecule 122 123 - patts: (optional) if provided this should be a dictionary of 4 tuples: 124 125 1) smarts text 126 127 2) SmartsPattern object 128 129 3) logp contribution 130 131 4) MR contribution 132 133 - order: (optional) a list with the order in which the keys of the pattern dictionary 134 should be evaluated 135 136 - verbose: (optional) toggles verbose printing 137 138 - addHs: (optional) toggles adding of Hs to the molecule for the calculation. 139 If positive, explicit and implicit Hs will be used in the calculation. 140 If this negative, then only explicit hydrogens will be taken into account in the 141 calculation. This is available to allow matching of results obtained with the old 142 backend code. 143 144 """ 145 if addHs < 0: 146 mol = Chem.AddHs(inMol,1) 147 elif addHs > 0: 148 mol = Chem.AddHs(inMol,0) 149 else: 150 mol = inMol 151 152 if patts is None: 153 global _smartsPatterns,_patternOrder 154 if _smartsPatterns == {}: 155 _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName) 156 patts = _smartsPatterns 157 order = _patternOrder 158 atomContribs = _pyGetAtomContribs(mol,patts,order,verbose=verbose) 159 #print 'AC:',atomContribs 160 return sum(atomContribs)[0]
161 pyMolLogP.version="1.1.0" 162
163 -def pyMolMR(inMol,patts=None,order=None,verbose=0,addHs=1):
164 """ Crippen MR value 165 166 Definition From JCICS _39_ 868-873 (1999) 167 168 **Arguments** 169 170 - inMol: a molecule 171 172 - patts: (optional) if provided this should be a dictionary of 4 tuples: 173 174 1) smarts text 175 176 2) SmartsPattern object 177 178 3) logp contribution 179 180 4) MR contribution 181 182 - order: (optional) a list with the order in which the keys of the pattern dictionary 183 should be evaluated 184 185 - verbose: (optional) toggles verbose printing 186 187 - addHs: (optional) toggles adding of Hs to the molecule for the calculation. 188 If positive, explicit and implicit Hs will be used in the calculation. 189 If this negative, then only explicit hydrogens will be taken into account in the 190 calculation. This is available to allow matching of results obtained with the old 191 backend code. 192 193 """ 194 if addHs < 0: 195 mol = Chem.AddHs(inMol,1) 196 elif addHs > 0: 197 mol = Chem.AddHs(inMol,0) 198 else: 199 mol = inMol 200 201 if patts is None: 202 global _smartsPatterns,_patternOrder 203 if _smartsPatterns == {}: 204 _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName) 205 patts = _smartsPatterns 206 order = _patternOrder 207 208 atomContribs = _pyGetAtomContribs(mol,patts,order,verbose=verbose) 209 return sum(atomContribs)[1]
210 pyMolMR.version="1.1.0" 211 212 MolLogP=lambda *x,**y:rdMolDescriptors.CalcCrippenDescriptors(*x,**y)[0] 213 MolLogP.version=rdMolDescriptors.__CalcCrippenDescriptors_version__ 214 MolLogP.__doc__=""" Wildman-Crippen LogP value 215 216 Uses an atom-based scheme based on the values in the paper: 217 S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999) 218 219 **Arguments** 220 221 - inMol: a molecule 222 223 - addHs: (optional) toggles adding of Hs to the molecule for the calculation. 224 If true, hydrogens will be added to the molecule and used in the calculation. 225 226 """ 227 228 MolMR=lambda *x,**y:rdMolDescriptors.CalcCrippenDescriptors(*x,**y)[1] 229 MolMR.version=rdMolDescriptors.__CalcCrippenDescriptors_version__ 230 MolMR.__doc__=""" Wildman-Crippen MR value 231 232 Uses an atom-based scheme based on the values in the paper: 233 S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999) 234 235 **Arguments** 236 237 - inMol: a molecule 238 239 - addHs: (optional) toggles adding of Hs to the molecule for the calculation. 240 If true, hydrogens will be added to the molecule and used in the calculation. 241 242 """ 243 244 if __name__=='__main__': 245 import sys 246 247 if len(sys.argv): 248 ms = [] 249 verbose=0 250 if '-v' in sys.argv: 251 verbose=1 252 sys.argv.remove('-v') 253 for smi in sys.argv[1:]: 254 ms.append((smi,Chem.MolFromSmiles(smi))) 255 256 for smi,m in ms: 257 print 'Mol: %s'%(smi) 258 logp = MolLogP(m,verbose=verbose) 259 print '----' 260 mr = MolMR(m,verbose=verbose) 261 print 'Res:',logp,mr 262 newM = Chem.AddHs(m) 263 logp = MolLogP(newM,addHs=0) 264 mr = MolMR(newM,addHs=0) 265 print '\t',logp,mr 266 print '-*-*-*-*-*-*-*-*' 267