Package rdkit :: Package Chem :: Module AllChem
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.AllChem

  1  # 
  2  #  Copyright (C) 2006-2017  greg Landrum and Rational Discovery LLC 
  3  # 
  4  #   @@ All Rights Reserved @@ 
  5  #  This file is part of the RDKit. 
  6  #  The contents are covered by the terms of the BSD license 
  7  #  which is included in the file license.txt, found at the root 
  8  #  of the RDKit source tree. 
  9  # 
 10  """ Import all RDKit chemistry modules 
 11   
 12  """ 
 13  import sys 
 14  import warnings 
 15  from collections import namedtuple 
 16   
 17  import numpy 
 18   
 19  from rdkit import DataStructs 
 20  from rdkit import ForceField 
 21  from rdkit import RDConfig 
 22  from rdkit import rdBase 
 23  from rdkit.Chem import * 
 24  from rdkit.Chem.ChemicalFeatures import * 
 25  from rdkit.Chem.rdChemReactions import * 
 26  from rdkit.Chem.rdDepictor import * 
 27  from rdkit.Chem.rdDistGeom import * 
 28  from rdkit.Chem.rdForceFieldHelpers import * 
 29  from rdkit.Chem.rdMolAlign import * 
 30  from rdkit.Chem.rdMolDescriptors import * 
 31  from rdkit.Chem.rdMolTransforms import * 
 32  from rdkit.Chem.rdPartialCharges import * 
 33  from rdkit.Chem.rdReducedGraphs import * 
 34  from rdkit.Chem.rdShapeHelpers import * 
 35  from rdkit.Chem.rdqueries import * 
 36  from rdkit.Geometry import rdGeometry 
 37  from rdkit.RDLogger import logger 
 38  from rdkit.Chem.EnumerateStereoisomers import StereoEnumerationOptions, EnumerateStereoisomers 
 39   
 40  try: 
 41    from rdkit.Chem.rdSLNParse import * 
 42  except ImportError: 
 43    pass 
 44   
 45  Mol.Compute2DCoords = Compute2DCoords 
 46  Mol.ComputeGasteigerCharges = ComputeGasteigerCharges 
 47  logger = logger() 
 48   
 49   
50 -def TransformMol(mol, tform, confId=-1, keepConfs=False):
51 """ Applies the transformation (usually a 4x4 double matrix) to a molecule 52 if keepConfs is False then all but that conformer are removed 53 """ 54 refConf = mol.GetConformer(confId) 55 TransformConformer(refConf, tform) 56 if not keepConfs: 57 if confId == -1: 58 confId = 0 59 allConfIds = [c.GetId() for c in mol.GetConformers()] 60 for cid in allConfIds: 61 if not cid == confId: 62 mol.RemoveConformer(cid) 63 # reset the conf Id to zero since there is only one conformer left 64 mol.GetConformer(confId).SetId(0)
65 66
67 -def ComputeMolShape(mol, confId=-1, boxDim=(20, 20, 20), spacing=0.5, **kwargs):
68 """ returns a grid representation of the molecule's shape 69 """ 70 res = rdGeometry.UniformGrid3D(boxDim[0], boxDim[1], boxDim[2], spacing=spacing) 71 EncodeShape(mol, res, confId, **kwargs) 72 return res
73 74
75 -def ComputeMolVolume(mol, confId=-1, gridSpacing=0.2, boxMargin=2.0):
76 """ Calculates the volume of a particular conformer of a molecule 77 based on a grid-encoding of the molecular shape. 78 79 """ 80 mol = rdchem.Mol(mol) 81 conf = mol.GetConformer(confId) 82 CanonicalizeConformer(conf) 83 box = ComputeConfBox(conf) 84 sideLen = (box[1].x - box[0].x + 2 * boxMargin, box[1].y - box[0].y + 2 * boxMargin, 85 box[1].z - box[0].z + 2 * boxMargin) 86 shape = rdGeometry.UniformGrid3D(sideLen[0], sideLen[1], sideLen[2], spacing=gridSpacing) 87 EncodeShape(mol, shape, confId, ignoreHs=False, vdwScale=1.0) 88 voxelVol = gridSpacing**3 89 occVect = shape.GetOccupancyVect() 90 voxels = [1 for x in occVect if x == 3] 91 vol = voxelVol * len(voxels) 92 return vol
93
94 -def GetConformerRMS(mol, confId1, confId2, atomIds=None, prealigned=False):
95 """ Returns the RMS between two conformations. 96 By default, the conformers will be aligned to the first conformer 97 before the RMS calculation and, as a side-effect, the second will be left 98 in the aligned state. 99 100 Arguments: 101 - mol: the molecule 102 - confId1: the id of the first conformer 103 - confId2: the id of the second conformer 104 - atomIds: (optional) list of atom ids to use a points for 105 alingment - defaults to all atoms 106 - prealigned: (optional) by default the conformers are assumed 107 be unaligned and the second conformer be aligned 108 to the first 109 110 """ 111 # align the conformers if necessary 112 # Note: the reference conformer is always the first one 113 if not prealigned: 114 if atomIds: 115 AlignMolConformers(mol, confIds=[confId1, confId2], atomIds=atomIds) 116 else: 117 AlignMolConformers(mol, confIds=[confId1, confId2]) 118 119 # calculate the RMS between the two conformations 120 conf1 = mol.GetConformer(id=confId1) 121 conf2 = mol.GetConformer(id=confId2) 122 ssr = 0 123 for i in range(mol.GetNumAtoms()): 124 d = conf1.GetAtomPosition(i).Distance(conf2.GetAtomPosition(i)) 125 ssr += d * d 126 ssr /= mol.GetNumAtoms() 127 return numpy.sqrt(ssr)
128 129
130 -def GetConformerRMSMatrix(mol, atomIds=None, prealigned=False):
131 """ Returns the RMS matrix of the conformers of a molecule. 132 As a side-effect, the conformers will be aligned to the first 133 conformer (i.e. the reference) and will left in the aligned state. 134 135 Arguments: 136 - mol: the molecule 137 - atomIds: (optional) list of atom ids to use a points for 138 alingment - defaults to all atoms 139 - prealigned: (optional) by default the conformers are assumed 140 be unaligned and will therefore be aligned to the 141 first conformer 142 143 Note that the returned RMS matrix is symmetrical, i.e. it is the 144 lower half of the matrix, e.g. for 5 conformers: 145 rmsmatrix = [ a, 146 b, c, 147 d, e, f, 148 g, h, i, j] 149 where a is the RMS between conformers 0 and 1, b is the RMS between 150 conformers 0 and 2, etc. 151 This way it can be directly used as distance matrix in e.g. Butina 152 clustering. 153 154 """ 155 # if necessary, align the conformers 156 # Note: the reference conformer is always the first one 157 rmsvals = [] 158 if not prealigned: 159 if atomIds: 160 AlignMolConformers(mol, atomIds=atomIds, RMSlist=rmsvals) 161 else: 162 AlignMolConformers(mol, RMSlist=rmsvals) 163 else: # already prealigned 164 for i in range(1, mol.GetNumConformers()): 165 rmsvals.append(GetConformerRMS(mol, 0, i, atomIds=atomIds, prealigned=prealigned)) 166 # loop over the conformations (except the reference one) 167 cmat = [] 168 for i in range(1, mol.GetNumConformers()): 169 cmat.append(rmsvals[i - 1]) 170 for j in range(1, i): 171 cmat.append(GetConformerRMS(mol, i, j, atomIds=atomIds, prealigned=True)) 172 return cmat
173 174
175 -def EnumerateLibraryFromReaction(reaction, sidechainSets, returnReactants=False):
176 """ Returns a generator for the virtual library defined by 177 a reaction and a sequence of sidechain sets 178 179 >>> from rdkit import Chem 180 >>> from rdkit.Chem import AllChem 181 >>> s1=[Chem.MolFromSmiles(x) for x in ('NC','NCC')] 182 >>> s2=[Chem.MolFromSmiles(x) for x in ('OC=O','OC(=O)C')] 183 >>> rxn = AllChem.ReactionFromSmarts('[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]') 184 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[s2,s1]) 185 >>> [Chem.MolToSmiles(x[0]) for x in list(r)] 186 ['CNC=O', 'CCNC=O', 'CNC(C)=O', 'CCNC(C)=O'] 187 188 Note that this is all done in a lazy manner, so "infinitely" large libraries can 189 be done without worrying about running out of memory. Your patience will run out first: 190 191 Define a set of 10000 amines: 192 >>> amines = (Chem.MolFromSmiles('N'+'C'*x) for x in range(10000)) 193 194 ... a set of 10000 acids 195 >>> acids = (Chem.MolFromSmiles('OC(=O)'+'C'*x) for x in range(10000)) 196 197 ... now the virtual library (1e8 compounds in principle): 198 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[acids,amines]) 199 200 ... look at the first 4 compounds: 201 >>> [Chem.MolToSmiles(next(r)[0]) for x in range(4)] 202 ['NC=O', 'CNC=O', 'CCNC=O', 'CCCNC=O'] 203 204 205 """ 206 if len(sidechainSets) != reaction.GetNumReactantTemplates(): 207 raise ValueError('%d sidechains provided, %d required' % 208 (len(sidechainSets), reaction.GetNumReactantTemplates())) 209 210 def _combiEnumerator(items, depth=0): 211 for item in items[depth]: 212 if depth + 1 < len(items): 213 v = _combiEnumerator(items, depth + 1) 214 for entry in v: 215 l = [item] 216 l.extend(entry) 217 yield l 218 else: 219 yield [item]
220 221 ProductReactants = namedtuple('ProductReactants', 'products,reactants') 222 for chains in _combiEnumerator(sidechainSets): 223 prodSets = reaction.RunReactants(chains) 224 for prods in prodSets: 225 if returnReactants: 226 yield ProductReactants(prods, chains) 227 else: 228 yield prods 229 230
231 -def ConstrainedEmbed(mol, core, useTethers=True, coreConfId=-1, randomseed=2342, 232 getForceField=UFFGetMoleculeForceField, **kwargs):
233 """ generates an embedding of a molecule where part of the molecule 234 is constrained to have particular coordinates 235 236 Arguments 237 - mol: the molecule to embed 238 - core: the molecule to use as a source of constraints 239 - useTethers: (optional) if True, the final conformation will be 240 optimized subject to a series of extra forces that pull the 241 matching atoms to the positions of the core atoms. Otherwise 242 simple distance constraints based on the core atoms will be 243 used in the optimization. 244 - coreConfId: (optional) id of the core conformation to use 245 - randomSeed: (optional) seed for the random number generator 246 247 248 An example, start by generating a template with a 3D structure: 249 >>> from rdkit.Chem import AllChem 250 >>> template = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1") 251 >>> AllChem.EmbedMolecule(template) 252 0 253 >>> AllChem.UFFOptimizeMolecule(template) 254 0 255 256 Here's a molecule: 257 >>> mol = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1-c3ccccc3") 258 259 Now do the constrained embedding 260 >>> newmol=AllChem.ConstrainedEmbed(mol, template) 261 262 Demonstrate that the positions are the same: 263 >>> newp=newmol.GetConformer().GetAtomPosition(0) 264 >>> molp=mol.GetConformer().GetAtomPosition(0) 265 >>> list(newp-molp)==[0.0,0.0,0.0] 266 True 267 >>> newp=newmol.GetConformer().GetAtomPosition(1) 268 >>> molp=mol.GetConformer().GetAtomPosition(1) 269 >>> list(newp-molp)==[0.0,0.0,0.0] 270 True 271 272 """ 273 match = mol.GetSubstructMatch(core) 274 if not match: 275 raise ValueError("molecule doesn't match the core") 276 coordMap = {} 277 coreConf = core.GetConformer(coreConfId) 278 for i, idxI in enumerate(match): 279 corePtI = coreConf.GetAtomPosition(i) 280 coordMap[idxI] = corePtI 281 282 ci = EmbedMolecule(mol, coordMap=coordMap, randomSeed=randomseed, **kwargs) 283 if ci < 0: 284 raise ValueError('Could not embed molecule.') 285 286 algMap = [(j, i) for i, j in enumerate(match)] 287 288 if not useTethers: 289 # clean up the conformation 290 ff = getForceField(mol, confId=0) 291 for i, idxI in enumerate(match): 292 for j in range(i + 1, len(match)): 293 idxJ = match[j] 294 d = coordMap[idxI].Distance(coordMap[idxJ]) 295 ff.AddDistanceConstraint(idxI, idxJ, d, d, 100.) 296 ff.Initialize() 297 n = 4 298 more = ff.Minimize() 299 while more and n: 300 more = ff.Minimize() 301 n -= 1 302 # rotate the embedded conformation onto the core: 303 rms = AlignMol(mol, core, atomMap=algMap) 304 else: 305 # rotate the embedded conformation onto the core: 306 rms = AlignMol(mol, core, atomMap=algMap) 307 ff = getForceField(mol, confId=0) 308 conf = core.GetConformer() 309 for i in range(core.GetNumAtoms()): 310 p = conf.GetAtomPosition(i) 311 pIdx = ff.AddExtraPoint(p.x, p.y, p.z, fixed=True) - 1 312 ff.AddDistanceConstraint(pIdx, match[i], 0, 0, 100.) 313 ff.Initialize() 314 n = 4 315 more = ff.Minimize(energyTol=1e-4, forceTol=1e-3) 316 while more and n: 317 more = ff.Minimize(energyTol=1e-4, forceTol=1e-3) 318 n -= 1 319 # realign 320 rms = AlignMol(mol, core, atomMap=algMap) 321 mol.SetProp('EmbedRMS', str(rms)) 322 return mol
323 324
325 -def AssignBondOrdersFromTemplate(refmol, mol):
326 """ assigns bond orders to a molecule based on the 327 bond orders in a template molecule 328 329 Arguments 330 - refmol: the template molecule 331 - mol: the molecule to assign bond orders to 332 333 An example, start by generating a template from a SMILES 334 and read in the PDB structure of the molecule 335 >>> import os 336 >>> from rdkit.Chem import AllChem 337 >>> template = AllChem.MolFromSmiles("CN1C(=NC(C1=O)(c2ccccc2)c3ccccc3)N") 338 >>> mol = AllChem.MolFromPDBFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4DJU_lig.pdb')) 339 >>> len([1 for b in template.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 340 8 341 >>> len([1 for b in mol.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 342 22 343 344 Now assign the bond orders based on the template molecule 345 >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol) 346 >>> len([1 for b in newMol.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 347 8 348 349 Note that the template molecule should have no explicit hydrogens 350 else the algorithm will fail. 351 352 It also works if there are different formal charges (this was github issue 235): 353 >>> template=AllChem.MolFromSmiles('CN(C)C(=O)Cc1ccc2c(c1)NC(=O)c3ccc(cc3N2)c4ccc(c(c4)OC)[N+](=O)[O-]') 354 >>> mol = AllChem.MolFromMolFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4FTR_lig.mol')) 355 >>> AllChem.MolToSmiles(mol) 356 'COC1CC(C2CCC3C(O)NC4CC(CC(O)N(C)C)CCC4NC3C2)CCC1N(O)O' 357 >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol) 358 >>> AllChem.MolToSmiles(newMol) 359 'COc1cc(-c2ccc3c(c2)Nc2ccc(CC(=O)N(C)C)cc2NC3=O)ccc1[N+](=O)[O-]' 360 361 """ 362 refmol2 = rdchem.Mol(refmol) 363 mol2 = rdchem.Mol(mol) 364 # do the molecules match already? 365 matching = mol2.GetSubstructMatch(refmol2) 366 if not matching: # no, they don't match 367 # check if bonds of mol are SINGLE 368 for b in mol2.GetBonds(): 369 if b.GetBondType() != BondType.SINGLE: 370 b.SetBondType(BondType.SINGLE) 371 b.SetIsAromatic(False) 372 # set the bonds of mol to SINGLE 373 for b in refmol2.GetBonds(): 374 b.SetBondType(BondType.SINGLE) 375 b.SetIsAromatic(False) 376 # set atom charges to zero; 377 for a in refmol2.GetAtoms(): 378 a.SetFormalCharge(0) 379 for a in mol2.GetAtoms(): 380 a.SetFormalCharge(0) 381 382 matching = mol2.GetSubstructMatches(refmol2, uniquify=False) 383 # do the molecules match now? 384 if matching: 385 if len(matching) > 1: 386 logger.warning("More than one matching pattern found - picking one") 387 matching = matching[0] 388 # apply matching: set bond properties 389 for b in refmol.GetBonds(): 390 atom1 = matching[b.GetBeginAtomIdx()] 391 atom2 = matching[b.GetEndAtomIdx()] 392 b2 = mol2.GetBondBetweenAtoms(atom1, atom2) 393 b2.SetBondType(b.GetBondType()) 394 b2.SetIsAromatic(b.GetIsAromatic()) 395 # apply matching: set atom properties 396 for a in refmol.GetAtoms(): 397 a2 = mol2.GetAtomWithIdx(matching[a.GetIdx()]) 398 a2.SetHybridization(a.GetHybridization()) 399 a2.SetIsAromatic(a.GetIsAromatic()) 400 a2.SetNumExplicitHs(a.GetNumExplicitHs()) 401 a2.SetFormalCharge(a.GetFormalCharge()) 402 SanitizeMol(mol2) 403 if hasattr(mol2, '__sssAtoms'): 404 mol2.__sssAtoms = None # we don't want all bonds highlighted 405 else: 406 raise ValueError("No matching found") 407 return mol2
408 409 # ------------------------------------ 410 # 411 # doctest boilerplate 412 #
413 -def _runDoctests(verbose=None): # pragma: nocover
414 import sys 415 import doctest 416 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) 417 sys.exit(failed) 418 419 420 if __name__ == '__main__': # pragma: nocover 421 _runDoctests() 422