1
2
3
4
5
6
7
8
9
10 """ Import all RDKit chemistry modules
11
12 """
13 import sys
14 import warnings
15 from collections import namedtuple
16
17 import numpy
18
19 from rdkit import DataStructs
20 from rdkit import ForceField
21 from rdkit import RDConfig
22 from rdkit import rdBase
23 from rdkit.Chem import *
24 from rdkit.Chem.ChemicalFeatures import *
25 from rdkit.Chem.rdChemReactions import *
26 from rdkit.Chem.rdDepictor import *
27 from rdkit.Chem.rdDistGeom import *
28 from rdkit.Chem.rdForceFieldHelpers import *
29 from rdkit.Chem.rdMolAlign import *
30 from rdkit.Chem.rdMolDescriptors import *
31 from rdkit.Chem.rdMolTransforms import *
32 from rdkit.Chem.rdPartialCharges import *
33 from rdkit.Chem.rdReducedGraphs import *
34 from rdkit.Chem.rdShapeHelpers import *
35 from rdkit.Chem.rdqueries import *
36 from rdkit.Geometry import rdGeometry
37 from rdkit.RDLogger import logger
38 from rdkit.Chem.EnumerateStereoisomers import StereoEnumerationOptions, EnumerateStereoisomers
39
40 try:
41 from rdkit.Chem.rdSLNParse import *
42 except ImportError:
43 pass
44
45 Mol.Compute2DCoords = Compute2DCoords
46 Mol.ComputeGasteigerCharges = ComputeGasteigerCharges
47 logger = logger()
48
49
65
66
67 -def ComputeMolShape(mol, confId=-1, boxDim=(20, 20, 20), spacing=0.5, **kwargs):
68 """ returns a grid representation of the molecule's shape
69 """
70 res = rdGeometry.UniformGrid3D(boxDim[0], boxDim[1], boxDim[2], spacing=spacing)
71 EncodeShape(mol, res, confId, **kwargs)
72 return res
73
74
93
128
129
173
174
176 """ Returns a generator for the virtual library defined by
177 a reaction and a sequence of sidechain sets
178
179 >>> from rdkit import Chem
180 >>> from rdkit.Chem import AllChem
181 >>> s1=[Chem.MolFromSmiles(x) for x in ('NC','NCC')]
182 >>> s2=[Chem.MolFromSmiles(x) for x in ('OC=O','OC(=O)C')]
183 >>> rxn = AllChem.ReactionFromSmarts('[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]')
184 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[s2,s1])
185 >>> [Chem.MolToSmiles(x[0]) for x in list(r)]
186 ['CNC=O', 'CCNC=O', 'CNC(C)=O', 'CCNC(C)=O']
187
188 Note that this is all done in a lazy manner, so "infinitely" large libraries can
189 be done without worrying about running out of memory. Your patience will run out first:
190
191 Define a set of 10000 amines:
192 >>> amines = (Chem.MolFromSmiles('N'+'C'*x) for x in range(10000))
193
194 ... a set of 10000 acids
195 >>> acids = (Chem.MolFromSmiles('OC(=O)'+'C'*x) for x in range(10000))
196
197 ... now the virtual library (1e8 compounds in principle):
198 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[acids,amines])
199
200 ... look at the first 4 compounds:
201 >>> [Chem.MolToSmiles(next(r)[0]) for x in range(4)]
202 ['NC=O', 'CNC=O', 'CCNC=O', 'CCCNC=O']
203
204
205 """
206 if len(sidechainSets) != reaction.GetNumReactantTemplates():
207 raise ValueError('%d sidechains provided, %d required' %
208 (len(sidechainSets), reaction.GetNumReactantTemplates()))
209
210 def _combiEnumerator(items, depth=0):
211 for item in items[depth]:
212 if depth + 1 < len(items):
213 v = _combiEnumerator(items, depth + 1)
214 for entry in v:
215 l = [item]
216 l.extend(entry)
217 yield l
218 else:
219 yield [item]
220
221 ProductReactants = namedtuple('ProductReactants', 'products,reactants')
222 for chains in _combiEnumerator(sidechainSets):
223 prodSets = reaction.RunReactants(chains)
224 for prods in prodSets:
225 if returnReactants:
226 yield ProductReactants(prods, chains)
227 else:
228 yield prods
229
230
233 """ generates an embedding of a molecule where part of the molecule
234 is constrained to have particular coordinates
235
236 Arguments
237 - mol: the molecule to embed
238 - core: the molecule to use as a source of constraints
239 - useTethers: (optional) if True, the final conformation will be
240 optimized subject to a series of extra forces that pull the
241 matching atoms to the positions of the core atoms. Otherwise
242 simple distance constraints based on the core atoms will be
243 used in the optimization.
244 - coreConfId: (optional) id of the core conformation to use
245 - randomSeed: (optional) seed for the random number generator
246
247
248 An example, start by generating a template with a 3D structure:
249 >>> from rdkit.Chem import AllChem
250 >>> template = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1")
251 >>> AllChem.EmbedMolecule(template)
252 0
253 >>> AllChem.UFFOptimizeMolecule(template)
254 0
255
256 Here's a molecule:
257 >>> mol = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1-c3ccccc3")
258
259 Now do the constrained embedding
260 >>> newmol=AllChem.ConstrainedEmbed(mol, template)
261
262 Demonstrate that the positions are the same:
263 >>> newp=newmol.GetConformer().GetAtomPosition(0)
264 >>> molp=mol.GetConformer().GetAtomPosition(0)
265 >>> list(newp-molp)==[0.0,0.0,0.0]
266 True
267 >>> newp=newmol.GetConformer().GetAtomPosition(1)
268 >>> molp=mol.GetConformer().GetAtomPosition(1)
269 >>> list(newp-molp)==[0.0,0.0,0.0]
270 True
271
272 """
273 match = mol.GetSubstructMatch(core)
274 if not match:
275 raise ValueError("molecule doesn't match the core")
276 coordMap = {}
277 coreConf = core.GetConformer(coreConfId)
278 for i, idxI in enumerate(match):
279 corePtI = coreConf.GetAtomPosition(i)
280 coordMap[idxI] = corePtI
281
282 ci = EmbedMolecule(mol, coordMap=coordMap, randomSeed=randomseed, **kwargs)
283 if ci < 0:
284 raise ValueError('Could not embed molecule.')
285
286 algMap = [(j, i) for i, j in enumerate(match)]
287
288 if not useTethers:
289
290 ff = getForceField(mol, confId=0)
291 for i, idxI in enumerate(match):
292 for j in range(i + 1, len(match)):
293 idxJ = match[j]
294 d = coordMap[idxI].Distance(coordMap[idxJ])
295 ff.AddDistanceConstraint(idxI, idxJ, d, d, 100.)
296 ff.Initialize()
297 n = 4
298 more = ff.Minimize()
299 while more and n:
300 more = ff.Minimize()
301 n -= 1
302
303 rms = AlignMol(mol, core, atomMap=algMap)
304 else:
305
306 rms = AlignMol(mol, core, atomMap=algMap)
307 ff = getForceField(mol, confId=0)
308 conf = core.GetConformer()
309 for i in range(core.GetNumAtoms()):
310 p = conf.GetAtomPosition(i)
311 pIdx = ff.AddExtraPoint(p.x, p.y, p.z, fixed=True) - 1
312 ff.AddDistanceConstraint(pIdx, match[i], 0, 0, 100.)
313 ff.Initialize()
314 n = 4
315 more = ff.Minimize(energyTol=1e-4, forceTol=1e-3)
316 while more and n:
317 more = ff.Minimize(energyTol=1e-4, forceTol=1e-3)
318 n -= 1
319
320 rms = AlignMol(mol, core, atomMap=algMap)
321 mol.SetProp('EmbedRMS', str(rms))
322 return mol
323
324
326 """ assigns bond orders to a molecule based on the
327 bond orders in a template molecule
328
329 Arguments
330 - refmol: the template molecule
331 - mol: the molecule to assign bond orders to
332
333 An example, start by generating a template from a SMILES
334 and read in the PDB structure of the molecule
335 >>> import os
336 >>> from rdkit.Chem import AllChem
337 >>> template = AllChem.MolFromSmiles("CN1C(=NC(C1=O)(c2ccccc2)c3ccccc3)N")
338 >>> mol = AllChem.MolFromPDBFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4DJU_lig.pdb'))
339 >>> len([1 for b in template.GetBonds() if b.GetBondTypeAsDouble() == 1.0])
340 8
341 >>> len([1 for b in mol.GetBonds() if b.GetBondTypeAsDouble() == 1.0])
342 22
343
344 Now assign the bond orders based on the template molecule
345 >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol)
346 >>> len([1 for b in newMol.GetBonds() if b.GetBondTypeAsDouble() == 1.0])
347 8
348
349 Note that the template molecule should have no explicit hydrogens
350 else the algorithm will fail.
351
352 It also works if there are different formal charges (this was github issue 235):
353 >>> template=AllChem.MolFromSmiles('CN(C)C(=O)Cc1ccc2c(c1)NC(=O)c3ccc(cc3N2)c4ccc(c(c4)OC)[N+](=O)[O-]')
354 >>> mol = AllChem.MolFromMolFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4FTR_lig.mol'))
355 >>> AllChem.MolToSmiles(mol)
356 'COC1CC(C2CCC3C(O)NC4CC(CC(O)N(C)C)CCC4NC3C2)CCC1N(O)O'
357 >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol)
358 >>> AllChem.MolToSmiles(newMol)
359 'COc1cc(-c2ccc3c(c2)Nc2ccc(CC(=O)N(C)C)cc2NC3=O)ccc1[N+](=O)[O-]'
360
361 """
362 refmol2 = rdchem.Mol(refmol)
363 mol2 = rdchem.Mol(mol)
364
365 matching = mol2.GetSubstructMatch(refmol2)
366 if not matching:
367
368 for b in mol2.GetBonds():
369 if b.GetBondType() != BondType.SINGLE:
370 b.SetBondType(BondType.SINGLE)
371 b.SetIsAromatic(False)
372
373 for b in refmol2.GetBonds():
374 b.SetBondType(BondType.SINGLE)
375 b.SetIsAromatic(False)
376
377 for a in refmol2.GetAtoms():
378 a.SetFormalCharge(0)
379 for a in mol2.GetAtoms():
380 a.SetFormalCharge(0)
381
382 matching = mol2.GetSubstructMatches(refmol2, uniquify=False)
383
384 if matching:
385 if len(matching) > 1:
386 logger.warning("More than one matching pattern found - picking one")
387 matching = matching[0]
388
389 for b in refmol.GetBonds():
390 atom1 = matching[b.GetBeginAtomIdx()]
391 atom2 = matching[b.GetEndAtomIdx()]
392 b2 = mol2.GetBondBetweenAtoms(atom1, atom2)
393 b2.SetBondType(b.GetBondType())
394 b2.SetIsAromatic(b.GetIsAromatic())
395
396 for a in refmol.GetAtoms():
397 a2 = mol2.GetAtomWithIdx(matching[a.GetIdx()])
398 a2.SetHybridization(a.GetHybridization())
399 a2.SetIsAromatic(a.GetIsAromatic())
400 a2.SetNumExplicitHs(a.GetNumExplicitHs())
401 a2.SetFormalCharge(a.GetFormalCharge())
402 SanitizeMol(mol2)
403 if hasattr(mol2, '__sssAtoms'):
404 mol2.__sssAtoms = None
405 else:
406 raise ValueError("No matching found")
407 return mol2
408
409
410
411
412
414 import sys
415 import doctest
416 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose)
417 sys.exit(failed)
418
419
420 if __name__ == '__main__':
421 _runDoctests()
422