RDKit
Open-source cheminformatics and machine learning.
RDKit::AtomPairs Namespace Reference

Functions

boost::uint32_t getAtomCode (const Atom *atom, unsigned int branchSubtract=0, bool includeChirality=false)
 
boost::uint32_t getAtomPairCode (boost::uint32_t codeI, boost::uint32_t codeJ, unsigned int dist, bool includeChirality=false)
 
SparseIntVect< boost::int32_t > * getAtomPairFingerprint (const ROMol &mol, unsigned int minLength, unsigned int maxLength, const std::vector< boost::uint32_t > *fromAtoms=0, const std::vector< boost::uint32_t > *ignoreAtoms=0, const std::vector< boost::uint32_t > *atomInvariants=0, bool includeChirality=false, bool use2D=true, int confId=-1)
 returns the atom-pair fingerprint for a molecule More...
 
SparseIntVect< boost::int32_t > * getAtomPairFingerprint (const ROMol &mol, const std::vector< boost::uint32_t > *fromAtoms=0, const std::vector< boost::uint32_t > *ignoreAtoms=0, const std::vector< boost::uint32_t > *atomInvariants=0, bool includeChirality=false, bool use2D=true, int confId=-1)
 
SparseIntVect< boost::int32_t > * getHashedAtomPairFingerprint (const ROMol &mol, unsigned int nBits=2048, unsigned int minLength=1, unsigned int maxLength=maxPathLen-1, const std::vector< boost::uint32_t > *fromAtoms=0, const std::vector< boost::uint32_t > *ignoreAtoms=0, const std::vector< boost::uint32_t > *atomInvariants=0, bool includeChirality=false, bool use2D=true, int confId=-1)
 returns the hashed atom-pair fingerprint for a molecule More...
 
ExplicitBitVectgetHashedAtomPairFingerprintAsBitVect (const ROMol &mol, unsigned int nBits=2048, unsigned int minLength=1, unsigned int maxLength=maxPathLen-1, const std::vector< boost::uint32_t > *fromAtoms=0, const std::vector< boost::uint32_t > *ignoreAtoms=0, const std::vector< boost::uint32_t > *atomInvariants=0, unsigned int nBitsPerEntry=4, bool includeChirality=false, bool use2D=true, int confId=-1)
 returns the hashed atom-pair fingerprint for a molecule as a bit vector More...
 
boost::uint64_t getTopologicalTorsionCode (const std::vector< boost::uint32_t > &atomCodes, bool includeChirality=false)
 
SparseIntVect< boost::int64_t > * getTopologicalTorsionFingerprint (const ROMol &mol, unsigned int targetSize=4, const std::vector< boost::uint32_t > *fromAtoms=0, const std::vector< boost::uint32_t > *ignoreAtoms=0, const std::vector< boost::uint32_t > *atomInvariants=0, bool includeChirality=false)
 returns the topological-torsion fingerprint for a molecule More...
 
SparseIntVect< boost::int64_t > * getHashedTopologicalTorsionFingerprint (const ROMol &mol, unsigned int nBits=2048, unsigned int targetSize=4, const std::vector< boost::uint32_t > *fromAtoms=0, const std::vector< boost::uint32_t > *ignoreAtoms=0, const std::vector< boost::uint32_t > *atomInvariants=0, bool includeChirality=false)
 returns a hashed topological-torsion fingerprint for a molecule More...
 
ExplicitBitVectgetHashedTopologicalTorsionFingerprintAsBitVect (const ROMol &mol, unsigned int nBits=2048, unsigned int targetSize=4, const std::vector< boost::uint32_t > *fromAtoms=0, const std::vector< boost::uint32_t > *ignoreAtoms=0, const std::vector< boost::uint32_t > *atomInvariants=0, unsigned int nBitsPerEntry=4, bool includeChirality=false)
 returns a hashed topological-torsion fingerprint for a molecule as a bit More...
 

Variables

const std::string atomPairsVersion = "1.1.0"
 
const unsigned int numTypeBits = 4
 
const unsigned int atomNumberTypes [1<< numTypeBits]
 
const unsigned int numPiBits = 2
 
const unsigned int maxNumPi = (1 << numPiBits) - 1
 
const unsigned int numBranchBits = 3
 
const unsigned int maxNumBranches = (1 << numBranchBits) - 1
 
const unsigned int numChiralBits = 2
 
const unsigned int codeSize = numTypeBits + numPiBits + numBranchBits
 
const unsigned int numPathBits = 5
 
const unsigned int maxPathLen = (1 << numPathBits) - 1
 
const unsigned int numAtomPairFingerprintBits
 

Function Documentation

boost::uint32_t RDKit::AtomPairs::getAtomCode ( const Atom atom,
unsigned int  branchSubtract = 0,
bool  includeChirality = false 
)

returns a numeric code for the atom (the atom's hash in the atom-pair scheme)

Parameters
atomthe atom to be considered
branchSubtract(optional) a constant to subtract from the number of neighbors when the hash is calculated (used in the topological torsions code)
includeChiralitytoggles the inclusions of bits indicating R/S chirality
boost::uint32_t RDKit::AtomPairs::getAtomPairCode ( boost::uint32_t  codeI,
boost::uint32_t  codeJ,
unsigned int  dist,
bool  includeChirality = false 
)

returns an atom pair hash based on two atom hashes and the distance between the atoms.

Parameters
codeIthe hash for the first atom
codeJthe hash for the second atom
distthe distance (number of bonds) between the two atoms
includeChiralitytoggles the inclusions of bits indicating R/S chirality
SparseIntVect<boost::int32_t>* RDKit::AtomPairs::getAtomPairFingerprint ( const ROMol mol,
unsigned int  minLength,
unsigned int  maxLength,
const std::vector< boost::uint32_t > *  fromAtoms = 0,
const std::vector< boost::uint32_t > *  ignoreAtoms = 0,
const std::vector< boost::uint32_t > *  atomInvariants = 0,
bool  includeChirality = false,
bool  use2D = true,
int  confId = -1 
)

returns the atom-pair fingerprint for a molecule

The algorithm used is described here: R.E. Carhart, D.H. Smith, R. Venkataraghavan; "Atom Pairs as Molecular Features in Structure-Activity Studies: Definition and Applications" JCICS 25, 64-73 (1985).

Parameters
molthe molecule to be fingerprinted
minLengthminimum distance between atoms to be considered in a pair. Default is 1 bond.
maxLengthmaximum distance between atoms to be considered in a pair. Default is maxPathLen-1 bonds.
fromAtomsif provided, only atom pairs that involve the specified atoms will be included in the fingerprint
ignoreAtomsif provided, any atom pairs that include the specified atoms will not be included in the fingerprint
atomInvariantsa list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used.
includeChiralityif set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided)
use2Dif set, the 2D (topological) distance matrix is used.
confIdthe conformation to use if 3D distances are being used
Returns
a pointer to the fingerprint. The client is responsible for calling delete on this.
SparseIntVect<boost::int32_t>* RDKit::AtomPairs::getAtomPairFingerprint ( const ROMol mol,
const std::vector< boost::uint32_t > *  fromAtoms = 0,
const std::vector< boost::uint32_t > *  ignoreAtoms = 0,
const std::vector< boost::uint32_t > *  atomInvariants = 0,
bool  includeChirality = false,
bool  use2D = true,
int  confId = -1 
)

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

SparseIntVect<boost::int32_t>* RDKit::AtomPairs::getHashedAtomPairFingerprint ( const ROMol mol,
unsigned int  nBits = 2048,
unsigned int  minLength = 1,
unsigned int  maxLength = maxPathLen-1,
const std::vector< boost::uint32_t > *  fromAtoms = 0,
const std::vector< boost::uint32_t > *  ignoreAtoms = 0,
const std::vector< boost::uint32_t > *  atomInvariants = 0,
bool  includeChirality = false,
bool  use2D = true,
int  confId = -1 
)

returns the hashed atom-pair fingerprint for a molecule

Parameters
molthe molecule to be fingerprinted
nBitsthe length of the fingerprint to generate
minLengthminimum distance between atoms to be considered in a pair. Default is 1 bond.
maxLengthmaximum distance between atoms to be considered in a pair. Default is maxPathLen-1 bonds.
fromAtomsif provided, only atom pairs that involve the specified atoms will be included in the fingerprint
ignoreAtomsif provided, any atom pairs that include the specified atoms will not be included in the fingerprint
atomInvariantsa list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used.
includeChiralityif set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided)
use2Dif set, the 2D (topological) distance matrix is used.
Returns
a pointer to the fingerprint. The client is responsible for calling delete on this.
ExplicitBitVect* RDKit::AtomPairs::getHashedAtomPairFingerprintAsBitVect ( const ROMol mol,
unsigned int  nBits = 2048,
unsigned int  minLength = 1,
unsigned int  maxLength = maxPathLen-1,
const std::vector< boost::uint32_t > *  fromAtoms = 0,
const std::vector< boost::uint32_t > *  ignoreAtoms = 0,
const std::vector< boost::uint32_t > *  atomInvariants = 0,
unsigned int  nBitsPerEntry = 4,
bool  includeChirality = false,
bool  use2D = true,
int  confId = -1 
)

returns the hashed atom-pair fingerprint for a molecule as a bit vector

Parameters
molthe molecule to be fingerprinted
nBitsthe length of the fingerprint to generate
minLengthminimum distance between atoms to be considered in a pair. Default is 1 bond.
maxLengthmaximum distance between atoms to be considered in a pair. Default is maxPathLen-1 bonds.
fromAtomsif provided, only atom pairs that involve the specified atoms will be included in the fingerprint
ignoreAtomsif provided, any atom pairs that include the specified atoms will not be included in the fingerprint
atomInvariantsa list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used.
nBitsPerEntrynumber of bits to use in simulating counts
includeChiralityif set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided)
use2Dif set, the 2D (topological) distance matrix is used.
confIdthe conformation to use if 3D distances are being used
Returns
a pointer to the fingerprint. The client is responsible for calling delete on this.
SparseIntVect<boost::int64_t>* RDKit::AtomPairs::getHashedTopologicalTorsionFingerprint ( const ROMol mol,
unsigned int  nBits = 2048,
unsigned int  targetSize = 4,
const std::vector< boost::uint32_t > *  fromAtoms = 0,
const std::vector< boost::uint32_t > *  ignoreAtoms = 0,
const std::vector< boost::uint32_t > *  atomInvariants = 0,
bool  includeChirality = false 
)

returns a hashed topological-torsion fingerprint for a molecule

The algorithm used is described here: R. Nilakantan, N. Bauman, J. S. Dixon, R. Venkataraghavan; "Topological Torsion: A New Molecular Descriptor for SAR Applications. Comparison with Other Descriptors" JCICS 27, 82-85 (1987).

Parameters
molthe molecule to be fingerprinted
nBitsnumber of bits to include in the fingerprint
targetSizethe number of atoms to include in the "torsions"
fromAtomsif provided, only torsions that start or end at the specified atoms will be included in the fingerprint
ignoreAtomsif provided, any torsions that include the specified atoms will not be included in the fingerprint
atomInvariantsa list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used.
includeChiralityif set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided)
Returns
a pointer to the fingerprint. The client is responsible for calling delete on this.
ExplicitBitVect* RDKit::AtomPairs::getHashedTopologicalTorsionFingerprintAsBitVect ( const ROMol mol,
unsigned int  nBits = 2048,
unsigned int  targetSize = 4,
const std::vector< boost::uint32_t > *  fromAtoms = 0,
const std::vector< boost::uint32_t > *  ignoreAtoms = 0,
const std::vector< boost::uint32_t > *  atomInvariants = 0,
unsigned int  nBitsPerEntry = 4,
bool  includeChirality = false 
)

returns a hashed topological-torsion fingerprint for a molecule as a bit

Parameters
molthe molecule to be fingerprinted
nBitsnumber of bits to include in the fingerprint
targetSizethe number of atoms to include in the "torsions"
fromAtomsif provided, only torsions that start or end at the specified atoms will be included in the fingerprint
ignoreAtomsif provided, any torsions that include the specified atoms will not be included in the fingerprint
atomInvariantsa list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used.
nBitsPerEntrynumber of bits to use in simulating counts
includeChiralityif set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided)
Returns
a pointer to the fingerprint. The client is responsible for calling delete on this.
boost::uint64_t RDKit::AtomPairs::getTopologicalTorsionCode ( const std::vector< boost::uint32_t > &  atomCodes,
bool  includeChirality = false 
)

returns an topological torsion hash based on the atom hashes passed in

Parameters
atomCodesthe vector of atom hashes
SparseIntVect<boost::int64_t>* RDKit::AtomPairs::getTopologicalTorsionFingerprint ( const ROMol mol,
unsigned int  targetSize = 4,
const std::vector< boost::uint32_t > *  fromAtoms = 0,
const std::vector< boost::uint32_t > *  ignoreAtoms = 0,
const std::vector< boost::uint32_t > *  atomInvariants = 0,
bool  includeChirality = false 
)

returns the topological-torsion fingerprint for a molecule

The algorithm used is described here: R. Nilakantan, N. Bauman, J. S. Dixon, R. Venkataraghavan; "Topological Torsion: A New Molecular Descriptor for SAR Applications. Comparison with Other Descriptors" JCICS 27, 82-85 (1987).

Parameters
molthe molecule to be fingerprinted
targetSizethe number of atoms to include in the "torsions"
fromAtomsif provided, only torsions that start or end at the specified atoms will be included in the fingerprint
ignoreAtomsif provided, any torsions that include the specified atoms will not be included in the fingerprint
atomInvariantsa list of invariants to use for the atom hashes note: only the first codeSize bits of each invariant are used.
includeChiralityif set, chirality will be used in the atom invariants (note: this is ignored if atomInvariants are provided)
Returns
a pointer to the fingerprint. The client is responsible for calling delete on this.

Variable Documentation

const unsigned int RDKit::AtomPairs::atomNumberTypes[1<< numTypeBits]
Initial value:
= {
5, 6, 7, 8, 9, 14, 15, 16, 17, 33, 34, 35, 51, 52, 43}

Definition at line 44 of file AtomPairs.h.

const std::string RDKit::AtomPairs::atomPairsVersion = "1.1.0"

Definition at line 42 of file AtomPairs.h.

const unsigned int RDKit::AtomPairs::codeSize = numTypeBits + numPiBits + numBranchBits

Definition at line 51 of file AtomPairs.h.

const unsigned int RDKit::AtomPairs::maxNumBranches = (1 << numBranchBits) - 1

Definition at line 49 of file AtomPairs.h.

const unsigned int RDKit::AtomPairs::maxNumPi = (1 << numPiBits) - 1

Definition at line 47 of file AtomPairs.h.

const unsigned int RDKit::AtomPairs::maxPathLen = (1 << numPathBits) - 1

Definition at line 53 of file AtomPairs.h.

const unsigned int RDKit::AtomPairs::numAtomPairFingerprintBits
Initial value:
=
const unsigned int numPathBits
Definition: AtomPairs.h:52
const unsigned int codeSize
Definition: AtomPairs.h:51

Definition at line 54 of file AtomPairs.h.

const unsigned int RDKit::AtomPairs::numBranchBits = 3

Definition at line 48 of file AtomPairs.h.

const unsigned int RDKit::AtomPairs::numChiralBits = 2

Definition at line 50 of file AtomPairs.h.

const unsigned int RDKit::AtomPairs::numPathBits = 5

Definition at line 52 of file AtomPairs.h.

const unsigned int RDKit::AtomPairs::numPiBits = 2

Definition at line 46 of file AtomPairs.h.

const unsigned int RDKit::AtomPairs::numTypeBits = 4

Definition at line 43 of file AtomPairs.h.