RDKit
Open-source cheminformatics and machine learning.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
AtomPairs.h File Reference
#include <DataStructs/SparseIntVect.h>
#include <DataStructs/BitVects.h>
#include <boost/cstdint.hpp>

Go to the source code of this file.

Namespaces

 RDKit
 Includes a bunch of functionality for handling Atom and Bond queries.
 
 RDKit::AtomPairs
 

Functions

boost::uint32_t RDKit::AtomPairs::getAtomCode (const Atom *atom, unsigned int branchSubtract=0, bool includeChirality=false)
 
boost::uint32_t RDKit::AtomPairs::getAtomPairCode (boost::uint32_t codeI, boost::uint32_t codeJ, unsigned int dist, bool includeChirality=false)
 
SparseIntVect< boost::int32_t > * RDKit::AtomPairs::getAtomPairFingerprint (const ROMol &mol, unsigned int minLength, unsigned int maxLength, const std::vector< boost::uint32_t > *fromAtoms=0, const std::vector< boost::uint32_t > *ignoreAtoms=0, const std::vector< boost::uint32_t > *atomInvariants=0, bool includeChirality=false, bool use2D=true)
 returns the atom-pair fingerprint for a molecule More...
 
SparseIntVect< boost::int32_t > * RDKit::AtomPairs::getAtomPairFingerprint (const ROMol &mol, const std::vector< boost::uint32_t > *fromAtoms=0, const std::vector< boost::uint32_t > *ignoreAtoms=0, const std::vector< boost::uint32_t > *atomInvariants=0, bool includeChirality=false, bool use2D=true)
 
SparseIntVect< boost::int32_t > * RDKit::AtomPairs::getHashedAtomPairFingerprint (const ROMol &mol, unsigned int nBits=2048, unsigned int minLength=1, unsigned int maxLength=maxPathLen-1, const std::vector< boost::uint32_t > *fromAtoms=0, const std::vector< boost::uint32_t > *ignoreAtoms=0, const std::vector< boost::uint32_t > *atomInvariants=0, bool includeChirality=false, bool use2D=true)
 returns the hashed atom-pair fingerprint for a molecule More...
 
ExplicitBitVectRDKit::AtomPairs::getHashedAtomPairFingerprintAsBitVect (const ROMol &mol, unsigned int nBits=2048, unsigned int minLength=1, unsigned int maxLength=maxPathLen-1, const std::vector< boost::uint32_t > *fromAtoms=0, const std::vector< boost::uint32_t > *ignoreAtoms=0, const std::vector< boost::uint32_t > *atomInvariants=0, unsigned int nBitsPerEntry=4, bool includeChirality=false, bool use2D=true)
 returns the hashed atom-pair fingerprint for a molecule as a bit vector More...
 
boost::uint64_t RDKit::AtomPairs::getTopologicalTorsionCode (const std::vector< boost::uint32_t > &atomCodes, bool includeChirality=false)
 
SparseIntVect< boost::int64_t > * RDKit::AtomPairs::getTopologicalTorsionFingerprint (const ROMol &mol, unsigned int targetSize=4, const std::vector< boost::uint32_t > *fromAtoms=0, const std::vector< boost::uint32_t > *ignoreAtoms=0, const std::vector< boost::uint32_t > *atomInvariants=0, bool includeChirality=false)
 returns the topological-torsion fingerprint for a molecule More...
 
SparseIntVect< boost::int64_t > * RDKit::AtomPairs::getHashedTopologicalTorsionFingerprint (const ROMol &mol, unsigned int nBits=2048, unsigned int targetSize=4, const std::vector< boost::uint32_t > *fromAtoms=0, const std::vector< boost::uint32_t > *ignoreAtoms=0, const std::vector< boost::uint32_t > *atomInvariants=0, bool includeChirality=false)
 returns a hashed topological-torsion fingerprint for a molecule More...
 
ExplicitBitVectRDKit::AtomPairs::getHashedTopologicalTorsionFingerprintAsBitVect (const ROMol &mol, unsigned int nBits=2048, unsigned int targetSize=4, const std::vector< boost::uint32_t > *fromAtoms=0, const std::vector< boost::uint32_t > *ignoreAtoms=0, const std::vector< boost::uint32_t > *atomInvariants=0, unsigned int nBitsPerEntry=4, bool includeChirality=false)
 returns a hashed topological-torsion fingerprint for a molecule as a bit vector More...
 

Variables

const std::string RDKit::AtomPairs::atomPairsVersion ="1.1.0"
 
const unsigned int RDKit::AtomPairs::numTypeBits =4
 
const unsigned int RDKit::AtomPairs::atomNumberTypes [1<< numTypeBits] ={5,6,7,8,9,14,15,16,17,33,34,35,51,52,43}
 
const unsigned int RDKit::AtomPairs::numPiBits =2
 
const unsigned int RDKit::AtomPairs::maxNumPi =(1<<numPiBits)-1
 
const unsigned int RDKit::AtomPairs::numBranchBits =3
 
const unsigned int RDKit::AtomPairs::maxNumBranches =(1<<numBranchBits)-1
 
const unsigned int RDKit::AtomPairs::numChiralBits =2
 
const unsigned int RDKit::AtomPairs::codeSize =numTypeBits+numPiBits+numBranchBits
 
const unsigned int RDKit::AtomPairs::numPathBits =5
 
const unsigned int RDKit::AtomPairs::maxPathLen =(1<<numPathBits)-1
 
const unsigned int RDKit::AtomPairs::numAtomPairFingerprintBits =numPathBits+2*codeSize
 

Detailed Description

A few quick notes about fingerprint size and the way chirality is handled in these functions.

By default the atom-pair and topologic-torsion fingerprints do not include any information about chirality; the atom invariants only include information about the atomic number, number of pi electrons, and degree. When chirality is included, two additional bits are added to the atom invariants to flag R/S/no chirality. These additional bits change the size of the atom invariants and either the size of the final fingerprint (atom pairs) or the maximum allowed path length (torsions). This means that even fingerprints for achiral molecules are different when includeChirality is true.

Definition in file AtomPairs.h.