00001 // 00002 // Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC 00003 // 00004 // @@ All Rights Reserved @@ 00005 // 00006 00007 #ifndef _RD_EMBEDDER_H_ 00008 #define _RD_EMBEDDER_H_ 00009 00010 #include <map> 00011 #include <Geometry/point.h> 00012 #include <GraphMol/ROMol.h> 00013 00014 namespace RDKit { 00015 namespace DGeomHelpers { 00016 00017 //! Compute an embedding (in 3D) for the specified molecule using Distance Geometry 00018 /*! 00019 The following operations are performed (in order) here: 00020 -# Build a distance bounds matrix based on the topology, including 1-5 00021 distances but not VDW scaling 00022 -# Triangle smooth this bounds matrix 00023 -# If step 2 fails - repeat step 1, this time without 1-5 bounds and with vdW 00024 scaling, and repeat step 2 00025 -# Pick a distance matrix at random using the bounds matrix 00026 -# Compute initial coordinates from the distance matrix 00027 -# Repeat steps 3 and 4 until maxIterations is reached or embedding is successful 00028 -# Adjust initial coordinates by minimizing a Distance Violation error function 00029 00030 **NOTE**: if the molecule has multiple fragments, they will be embedded separately, 00031 this means that they will likely occupy the same region of space. 00032 00033 \param mol Molecule of interest 00034 \param maxIterations Max. number of times the embedding will be tried if coordinates are 00035 not obtained successfully. The default value is 10x the number of atoms. 00036 \param seed provides a seed for the random number generator (so that the same 00037 coordinates can be obtained for a molecule on multiple runs) 00038 If negative, the RNG will not be seeded. 00039 \param clearConfs Clear all existing conformations on the molecule 00040 \param useRandomCoords Start the embedding from random coordinates instead of 00041 using eigenvalues of the distance matrix. 00042 \param boxSizeMult Determines the size of the box that is used for 00043 random coordinates. If this is a positive number, the 00044 side length will equal the largest element of the distance 00045 matrix times \c boxSizeMult. If this is a negative number, 00046 the side length will equal \c -boxSizeMult (i.e. independent 00047 of the elements of the distance matrix). 00048 \param randNegEig Picks coordinates at random when a embedding process produces 00049 negative eigenvalues 00050 \param numZeroFail Fail embedding if we find this many or more zero eigenvalues 00051 (within a tolerance) 00052 \param coordMap a map of int to Point3D, between atom IDs and their locations 00053 their locations. If this container is provided, the coordinates 00054 are used to set distance constraints on the embedding. The resulting 00055 conformer(s) should have distances between the specified atoms that 00056 reproduce those between the points in \c coordMap. Because the embedding 00057 produces a molecule in an arbitrary reference frame, an alignment step 00058 is required to actually reproduce the provided coordinates. 00059 \param optimizerForceTol set the tolerance on forces in the distgeom optimizer 00060 (this shouldn't normally be altered in client code). 00061 \param basinThresh set the basin threshold for the DGeom force field, 00062 (this shouldn't normally be altered in client code). 00063 00064 00065 \return ID of the conformations added to the molecule, -1 if the emdedding failed 00066 */ 00067 int EmbedMolecule(ROMol &mol, unsigned int maxIterations=0, int seed=-1, 00068 bool clearConfs=true, 00069 bool useRandomCoords=false,double boxSizeMult=2.0, 00070 bool randNegEig=true, 00071 unsigned int numZeroFail=1, 00072 const std::map<int,RDGeom::Point3D> *coordMap=0, 00073 double optimizerForceTol=1e-3, 00074 double basinThresh=5.0); 00075 00076 //*! Embed multiple conformations for a molecule 00077 /*! 00078 This is kind of equivalent to calling EmbedMolecule multiple times - just that the bounds 00079 matrix is computed only once from the topology 00080 00081 **NOTE**: if the molecule has multiple fragments, they will be embedded separately, 00082 this means that they will likely occupy the same region of space. 00083 00084 00085 \param mol Molecule of interest 00086 \param numConfs Number of conformations to be generated 00087 \param maxIterations Max. number of times the embedding will be tried if coordinates are 00088 not obtained successfully. The default value is 10x the number of atoms. 00089 \param seed provides a seed for the random number generator (so that the same 00090 coordinates can be obtained for a molecule on multiple runs). 00091 If negative, the RNG will not be seeded. 00092 \param clearConfs Clear all existing conformations on the molecule 00093 \param useRandomCoords Start the embedding from random coordinates instead of 00094 using eigenvalues of the distance matrix. 00095 \param boxSizeMult Determines the size of the box that is used for 00096 random coordinates. If this is a positive number, the 00097 side length will equal the largest element of the distance 00098 matrix times \c boxSizeMult. If this is a negative number, 00099 the side length will equal \c -boxSizeMult (i.e. independent 00100 of the elements of the distance matrix). 00101 \param randNegEig Picks coordinates at random when a embedding process produces 00102 negative eigenvalues 00103 \param numZeroFail Fail embedding if we find this many or more zero eigenvalues 00104 (within a tolerance) 00105 \param pruneRmsThresh Retain only the conformations out of 'numConfs' after embedding that are 00106 at least this far apart from each other. RMSD is computed on the heavy atoms. 00107 Prunining is greedy; i.e. the first embedded conformation is retained and from 00108 then on only those that are atleast pruneRmsThresh away from already 00109 retained conformations are kept. The pruning is done after embedding and 00110 bounds violation minimization. No pruning by default. 00111 \param coordMap a map of int to Point3D, between atom IDs and their locations 00112 their locations. If this container is provided, the coordinates 00113 are used to set distance constraints on the embedding. The resulting 00114 conformer(s) should have distances between the specified atoms that 00115 reproduce those between the points in \c coordMap. Because the embedding 00116 produces a molecule in an arbitrary reference frame, an alignment step 00117 is required to actually reproduce the provided coordinates. 00118 00119 \param optimizerForceTol set the tolerance on forces in the DGeom optimizer 00120 (this shouldn't normally be altered in client code). 00121 \param basinThresh set the basin threshold for the DGeom force field, 00122 (this shouldn't normally be altered in client code). 00123 00124 \return an INT_VECT of conformer ids 00125 00126 */ 00127 INT_VECT EmbedMultipleConfs(ROMol &mol, unsigned int numConfs=10, 00128 unsigned int maxIterations=30, 00129 int seed=-1, bool clearConfs=true, 00130 bool useRandomCoords=false,double boxSizeMult=2.0, 00131 bool randNegEig=true, unsigned int numZeroFail=1, 00132 double pruneRmsThresh=-1.0, 00133 const std::map<int,RDGeom::Point3D> *coordMap=0, 00134 double optimizerForceTol=1e-3,double basinThresh=5.0); 00135 00136 } 00137 } 00138 00139 #endif
1.5.6