Embedder.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
00003 //
00004 //   @@ All Rights Reserved  @@
00005 //
00006 
00007 #ifndef _RD_EMBEDDER_H_
00008 #define _RD_EMBEDDER_H_
00009 
00010 #include <map>
00011 #include <Geometry/point.h>
00012 #include <GraphMol/ROMol.h>
00013 
00014 namespace RDKit {
00015   namespace DGeomHelpers {
00016     
00017     //! Compute an embedding (in 3D) for the specified molecule using Distance Geometry
00018     /*!
00019       The following operations are performed (in order) here:
00020        -# Build a distance bounds matrix based on the topology, including 1-5
00021           distances but not VDW scaling
00022        -# Triangle smooth this bounds matrix
00023        -# If step 2 fails - repeat step 1, this time without 1-5 bounds and with vdW
00024           scaling, and repeat step 2
00025        -# Pick a distance matrix at random using the bounds matrix
00026        -# Compute initial coordinates from the distance matrix
00027        -# Repeat steps 3 and 4 until maxIterations is reached or embedding is successful
00028        -# Adjust initial coordinates by minimizing a Distance Violation error function
00029 
00030        **NOTE**: if the molecule has multiple fragments, they will be embedded separately,
00031          this means that they will likely occupy the same region of space.
00032        
00033       \param mol            Molecule of interest
00034       \param maxIterations  Max. number of times the embedding will be tried if coordinates are 
00035                             not obtained successfully. The default value is 10x the number of atoms.
00036       \param seed           provides a seed for the random number generator (so that the same
00037                             coordinates can be obtained for a molecule on multiple runs)
00038                             If negative, the RNG will not be seeded.
00039       \param clearConfs     Clear all existing conformations on the molecule
00040       \param useRandomCoords  Start the embedding from random coordinates instead of
00041                               using eigenvalues of the distance matrix.
00042       \param boxSizeMult    Determines the size of the box that is used for
00043                             random coordinates. If this is a positive number, the 
00044                             side length will equal the largest element of the distance
00045                             matrix times \c boxSizeMult. If this is a negative number,
00046                             the side length will equal \c -boxSizeMult (i.e. independent
00047                             of the elements of the distance matrix).
00048       \param randNegEig     Picks coordinates at random when a embedding process produces
00049                             negative eigenvalues
00050       \param numZeroFail    Fail embedding if we find this many or more zero eigenvalues
00051                             (within a tolerance)
00052       \param coordMap  a map of int to Point3D, between atom IDs and their locations
00053                        their locations.  If this container is provided, the coordinates
00054                        are used to set distance constraints on the embedding. The resulting
00055                        conformer(s) should have distances between the specified atoms that
00056                        reproduce those between the points in \c coordMap. Because the embedding
00057                        produces a molecule in an arbitrary reference frame, an alignment step
00058                        is required to actually reproduce the provided coordinates.
00059       \param optimizerForceTol set the tolerance on forces in the distgeom optimizer
00060                                (this shouldn't normally be altered in client code).
00061       \param basinThresh    set the basin threshold for the DGeom force field,
00062                             (this shouldn't normally be altered in client code).
00063 
00064 
00065       \return ID of the conformations added to the molecule, -1 if the emdedding failed
00066     */
00067     int EmbedMolecule(ROMol &mol, unsigned int maxIterations=0, int seed=-1,
00068                       bool clearConfs=true,
00069                       bool useRandomCoords=false,double boxSizeMult=2.0,
00070                       bool randNegEig=true,
00071                       unsigned int numZeroFail=1,
00072                       const std::map<int,RDGeom::Point3D> *coordMap=0,
00073                       double optimizerForceTol=1e-3,
00074                       double basinThresh=5.0);
00075 
00076     //*! Embed multiple conformations for a molecule
00077     /*!
00078       This is kind of equivalent to calling EmbedMolecule multiple times - just that the bounds
00079       matrix is computed only once from the topology
00080 
00081        **NOTE**: if the molecule has multiple fragments, they will be embedded separately,
00082          this means that they will likely occupy the same region of space.
00083 
00084 
00085       \param mol            Molecule of interest
00086       \param numConfs       Number of conformations to be generated
00087       \param maxIterations  Max. number of times the embedding will be tried if coordinates are 
00088                             not obtained successfully. The default value is 10x the number of atoms.
00089       \param seed           provides a seed for the random number generator (so that the same
00090                             coordinates can be obtained for a molecule on multiple runs).
00091                             If negative, the RNG will not be seeded.
00092       \param clearConfs     Clear all existing conformations on the molecule
00093       \param useRandomCoords  Start the embedding from random coordinates instead of
00094                               using eigenvalues of the distance matrix.
00095       \param boxSizeMult    Determines the size of the box that is used for
00096                             random coordinates. If this is a positive number, the 
00097                             side length will equal the largest element of the distance
00098                             matrix times \c boxSizeMult. If this is a negative number,
00099                             the side length will equal \c -boxSizeMult (i.e. independent
00100                             of the elements of the distance matrix).
00101       \param randNegEig     Picks coordinates at random when a embedding process produces
00102                             negative eigenvalues
00103       \param numZeroFail    Fail embedding if we find this many or more zero eigenvalues
00104                             (within a tolerance)
00105       \param pruneRmsThresh Retain only the conformations out of 'numConfs' after embedding that are
00106                             at least this far apart from each other. RMSD is computed on the heavy atoms.
00107                             Prunining is greedy; i.e. the first embedded conformation is retained and from
00108                             then on only those that are atleast pruneRmsThresh away from already 
00109                             retained conformations are kept. The pruning is done after embedding and 
00110                             bounds violation minimization. No pruning by default.
00111       \param coordMap  a map of int to Point3D, between atom IDs and their locations
00112                        their locations.  If this container is provided, the coordinates
00113                        are used to set distance constraints on the embedding. The resulting
00114                        conformer(s) should have distances between the specified atoms that
00115                        reproduce those between the points in \c coordMap. Because the embedding
00116                        produces a molecule in an arbitrary reference frame, an alignment step
00117                        is required to actually reproduce the provided coordinates.
00118 
00119       \param optimizerForceTol set the tolerance on forces in the DGeom optimizer
00120                                (this shouldn't normally be altered in client code).
00121       \param basinThresh    set the basin threshold for the DGeom force field,
00122                             (this shouldn't normally be altered in client code).
00123 
00124       \return an INT_VECT of conformer ids
00125 
00126     */
00127     INT_VECT EmbedMultipleConfs(ROMol &mol, unsigned int numConfs=10,
00128                                 unsigned int maxIterations=30, 
00129                                 int seed=-1, bool clearConfs=true, 
00130                                 bool useRandomCoords=false,double boxSizeMult=2.0,
00131                                 bool randNegEig=true, unsigned int numZeroFail=1,
00132                                 double pruneRmsThresh=-1.0,
00133                                 const std::map<int,RDGeom::Point3D> *coordMap=0,
00134                                 double optimizerForceTol=1e-3,double basinThresh=5.0);
00135 
00136   }
00137 }
00138 
00139 #endif

Generated on Fri Apr 3 06:03:01 2009 for RDCode by  doxygen 1.5.6