AtomPairs.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (C) 2007-2008 Greg Landrum
00003 //
00004 //   @@ All Rights Reserved  @@
00005 //
00006 
00007 /*! \file AtomPairs.h
00008 
00009   \brief Use MolDescriptors.h in client code.
00010 
00011 */
00012 #ifndef __RD_ATOMPAIRS_H__
00013 #define __RD_ATOMPAIRS_H__
00014 
00015 #include <DataStructs/SparseIntVect.h>
00016 #include <DataStructs/BitVects.h>
00017 #include <boost/cstdint.hpp>
00018 namespace RDKit {
00019   class Atom;
00020 
00021   namespace Descriptors {
00022     namespace AtomPairs {
00023       const std::string atomPairsVersion="1.1.0";
00024       const unsigned int numTypeBits=4;
00025       const unsigned int atomNumberTypes[1<<numTypeBits]={5,6,7,8,9,14,15,16,17,33,34,35,51,52,43};
00026       const unsigned int numPiBits=2;
00027       const unsigned int maxNumPi=(1<<numPiBits)-1;
00028       const unsigned int numBranchBits=3;
00029       const unsigned int maxNumBranches=(1<<numBranchBits)-1;
00030       const unsigned int codeSize=numTypeBits+numPiBits+numBranchBits;
00031       const unsigned int numPathBits=5;
00032       const unsigned int maxPathLen=(1<<numPathBits)-1;
00033       const unsigned int numAtomPairFingerprintBits=numPathBits+2*codeSize;
00034     
00035       //! returns a numeric code for the atom (the atom's hash in the
00036       //! atom-pair scheme)
00037       /*!
00038         \param atom            the atom to be considered
00039         \param branchSubtract  (optional) a constant to subtract from
00040                                the number of neighbors when the hash
00041                                is calculated (used in the topological
00042                                torsions code)
00043       */
00044       boost::uint32_t getAtomCode(const Atom *atom,unsigned int branchSubtract=0);
00045 
00046       //! returns an atom pair hash based on two atom hashes and the
00047       //! distance between the atoms.
00048       /*!
00049         \param codeI  the hash for the first atom
00050         \param codeJ  the hash for the second atom
00051         \param dist   the distance (number of bonds) between the two
00052                       atoms
00053        */
00054       boost::uint32_t getAtomPairCode(boost::uint32_t codeI,boost::uint32_t codeJ,
00055                                       unsigned int dist);
00056 
00057       //! returns the atom-pair fingerprint for a molecule
00058       /*!
00059         The algorithm used is described here:
00060         R.E. Carhart, D.H. Smith, R. Venkataraghavan; "Atom Pairs as
00061           Molecular Features in Structure-Activity Studies: Definition
00062           and Applications" JCICS 25, 64-73 (1985).
00063 
00064       
00065         \param mol:   the molecule to be fingerprinted
00066         \param fromAtoms:   if provided, only atom pairs that involve
00067             the specified atoms will be included in the fingerprint
00068         \return a pointer to the fingerprint. The client is
00069                 responsible for calling delete on this.
00070 
00071        */
00072       SparseIntVect<boost::int32_t> *getAtomPairFingerprint(const ROMol &mol,
00073                                                             const std::vector<boost::uint32_t> *fromAtoms=0);
00074 
00075       //! returns the hashed atom-pair fingerprint for a molecule
00076       /*!
00077         \param mol:   the molecule to be fingerprinted
00078         \param nBits:   the length of the fingerprint to generate
00079         \return a pointer to the fingerprint. The client is
00080                 responsible for calling delete on this.
00081 
00082        */
00083       ExplicitBitVect *getHashedAtomPairFingerprint(const ROMol &mol,
00084                                                     unsigned int nBits=2048);
00085 
00086       
00087       //! returns an topological torsion hash based on the atom hashes
00088       //! passed in
00089       /*!
00090         \param atomCodes  the vector of atom hashes
00091        */
00092       boost::uint64_t getTopologicalTorsionCode(const std::vector<boost::uint32_t> &atomCodes);
00093 
00094       //! returns the topological-torsion fingerprint for a molecule
00095       /*!
00096         The algorithm used is described here:
00097         R. Nilakantan, N. Bauman, J. S. Dixon, R. Venkataraghavan;
00098         "Topological Torsion: A New Molecular Descriptor for SAR Applications.
00099         Comparison with Other Descriptors" JCICS 27, 82-85 (1987).
00100 
00101         \param mol:         the molecule to be fingerprinted
00102         \param targetSize:  the number of atoms to include in the torsions
00103         \param fromAtoms:   if provided, only torsions that start or end at
00104             the specified atoms will be included in the fingerprint
00105 
00106         \return a pointer to the fingerprint. The client is
00107                 responsible for calling delete on this.
00108 
00109        */
00110       SparseIntVect<boost::int64_t > *getTopologicalTorsionFingerprint(const ROMol &mol,
00111                                                                        unsigned int targetSize=4,
00112                                                                        const std::vector<boost::uint32_t> *fromAtoms=0);
00113     }    
00114   } // end of namespace Descriptors
00115 }
00116 
00117 #endif

Generated on Fri Apr 3 06:03:01 2009 for RDCode by  doxygen 1.5.6