RDKit
Open-source cheminformatics and machine learning.
Fingerprints.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2003-2012 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef _RD_FINGERPRINTS_H_
11 #define _RD_FINGERPRINTS_H_
12 
13 #include <vector>
14 #include <boost/cstdint.hpp>
16 
17 class ExplicitBitVect;
18 namespace RDKit {
19 class ROMol;
20 
21 //! \brief Generates a topological (Daylight like) fingerprint for a molecule
22 //! using an alternate (faster) hashing algorithm
23 /*!
24 
25  \param mol: the molecule to be fingerprinted
26  \param minPath: the minimum path length (in bonds) to be included
27  \param maxPath: the minimum path length (in bonds) to be included
28  \param fpSize: the size of the fingerprint
29  \param nBitsPerHash: the number of bits to be set by each path
30  \param useHs: toggles inclusion of Hs in paths (if the molecule has
31  explicit Hs)
32  \param tgtDensity: if the generated fingerprint is below this density, it
33  will
34  be folded until the density is reached.
35  \param minSize: the minimum size to which the fingerprint will be
36  folded
37  \param branchedPaths: toggles generation of branched subgraphs, not just
38  linear paths
39  \param useBondOrders: toggles inclusion of bond orders in the path hashes
40  \param atomInvariants: a vector of atom invariants to use while hashing the
41  paths
42  \param fromAtoms: only paths starting at these atoms will be included
43  \param atomBits: used to return the bits that each atom is involved in
44  (should be at least \c mol.numAtoms long)
45 
46  \return the molecular fingerprint, as an ExplicitBitVect
47 
48  <b>Notes:</b>
49  - the caller is responsible for <tt>delete</tt>ing the result
50 
51 */
53  const ROMol &mol, unsigned int minPath = 1, unsigned int maxPath = 7,
54  unsigned int fpSize = 2048, unsigned int nBitsPerHash = 2,
55  bool useHs = true, double tgtDensity = 0.0, unsigned int minSize = 128,
56  bool branchedPaths = true, bool useBondOrder = true,
57  std::vector<boost::uint32_t> *atomInvariants = 0,
58  const std::vector<boost::uint32_t> *fromAtoms = 0,
59  std::vector<std::vector<boost::uint32_t> > *atomBits = 0,
60  std::map<boost::uint32_t,std::vector<std::vector<int> > > *bitInfo=0);
61 const std::string RDKFingerprintMolVersion = "2.0.0";
62 
63 //! \brief Generates a topological (Daylight like) fingerprint for a molecule
64 //! using a layer-based hashing algorithm
65 /*!
66 
67  <b>Experimental:</b> This function is experimental. The API or results may
68  change from
69  release to release.
70 
71  \param mol: the molecule to be fingerprinted
72  \param layerFlags: the layers to be included (see below)
73  \param minPath: the minimum path length (in bonds) to be included
74  \param maxPath: the minimum path length (in bonds) to be included
75  \param fpSize: the size of the fingerprint
76  \param atomCounts: if provided, this will be used to provide the count of
77  the number
78  of paths that set bits each atom is involved in. The
79  vector should
80  have at least as many entries as the molecule has atoms
81  and is not
82  zeroed out here.
83  \param setOnlyBits: if provided, only bits that are set in this bit vector
84  will be set
85  in the result. This is essentially the same as doing:
86  (*res) &= (*setOnlyBits);
87  but also has an impact on the atomCounts (if being used)
88  \param branchedPaths: toggles generation of branched subgraphs, not just
89  linear paths
90 
91  \return the molecular fingerprint, as an ExplicitBitVect
92 
93  <b>Notes:</b>
94  - the caller is responsible for <tt>delete</tt>ing the result
95 
96  <b>Layer definitions:</b>
97  - 0x01: pure topology
98  - 0x02: bond order
99  - 0x04: atom types
100  - 0x08: presence of rings
101  - 0x10: ring sizes
102  - 0x20: aromaticity
103 */
105  const ROMol &mol, unsigned int layerFlags = 0xFFFFFFFF,
106  unsigned int minPath = 1, unsigned int maxPath = 7,
107  unsigned int fpSize = 2048, std::vector<unsigned int> *atomCounts = 0,
108  ExplicitBitVect *setOnlyBits = 0, bool branchedPaths = true,
109  const std::vector<boost::uint32_t> *fromAtoms = 0);
110 const unsigned int maxFingerprintLayers = 10;
111 const std::string LayeredFingerprintMolVersion = "0.7.0";
112 const unsigned int substructLayers = 0x07;
113 
114 //! \brief Generates a topological fingerprint for a molecule
115 //! using a series of pre-defined structural patterns
116 /*!
117 
118  <b>Experimental:</b> This function is experimental. The API or results may
119  change from
120  release to release.
121 
122  \param mol: the molecule to be fingerprinted
123  \param fpSize: the size of the fingerprint
124  \param atomCounts: if provided, this will be used to provide the count of
125  the number
126  of paths that set bits each atom is involved in. The
127  vector should
128  have at least as many entries as the molecule has atoms
129  and is not
130  zeroed out here.
131  \param setOnlyBits: if provided, only bits that are set in this bit vector
132  will be set
133  in the result. This is essentially the same as doing:
134  (*res) &= (*setOnlyBits);
135  but also has an impact on the atomCounts (if being used)
136 
137  \return the molecular fingerprint, as an ExplicitBitVect
138 
139  <b>Notes:</b>
140  - the caller is responsible for <tt>delete</tt>ing the result
141 
142 */
144  const ROMol &mol, unsigned int fpSize = 2048,
145  std::vector<unsigned int> *atomCounts = 0,
146  ExplicitBitVect *setOnlyBits = 0);
147 
148 SparseIntVect<boost::uint64_t> *getUnfoldedRDKFingerprintMol(const ROMol &mol,unsigned int minPath=1,
149  unsigned int maxPath=7,
150  bool useHs=true,
151  bool branchedPaths=true,
152  bool useBondOrder=true,
153  std::vector<boost::uint32_t> *atomInvariants=0,
154  const std::vector<boost::uint32_t> *fromAtoms=0,
155  std::vector<std::vector<boost::uint64_t> > *atomBits=0,
156  std::map<boost::uint64_t,std::vector<std::vector<int> > > *bitInfo=0);
157 
158 }
159 
160 
161 #endif
ExplicitBitVect * LayeredFingerprintMol(const ROMol &mol, unsigned int layerFlags=0xFFFFFFFF, unsigned int minPath=1, unsigned int maxPath=7, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=0, ExplicitBitVect *setOnlyBits=0, bool branchedPaths=true, const std::vector< boost::uint32_t > *fromAtoms=0)
Generates a topological (Daylight like) fingerprint for a molecule using a layer-based hashing algori...
const unsigned int maxFingerprintLayers
Definition: Fingerprints.h:110
SparseIntVect< boost::uint64_t > * getUnfoldedRDKFingerprintMol(const ROMol &mol, unsigned int minPath=1, unsigned int maxPath=7, bool useHs=true, bool branchedPaths=true, bool useBondOrder=true, std::vector< boost::uint32_t > *atomInvariants=0, const std::vector< boost::uint32_t > *fromAtoms=0, std::vector< std::vector< boost::uint64_t > > *atomBits=0, std::map< boost::uint64_t, std::vector< std::vector< int > > > *bitInfo=0)
const unsigned int substructLayers
Definition: Fingerprints.h:112
const std::string RDKFingerprintMolVersion
Definition: Fingerprints.h:61
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:103
ExplicitBitVect * RDKFingerprintMol(const ROMol &mol, unsigned int minPath=1, unsigned int maxPath=7, unsigned int fpSize=2048, unsigned int nBitsPerHash=2, bool useHs=true, double tgtDensity=0.0, unsigned int minSize=128, bool branchedPaths=true, bool useBondOrder=true, std::vector< boost::uint32_t > *atomInvariants=0, const std::vector< boost::uint32_t > *fromAtoms=0, std::vector< std::vector< boost::uint32_t > > *atomBits=0, std::map< boost::uint32_t, std::vector< std::vector< int > > > *bitInfo=0)
Generates a topological (Daylight like) fingerprint for a molecule using an alternate (faster) hashin...
ExplicitBitVect * PatternFingerprintMol(const ROMol &mol, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=0, ExplicitBitVect *setOnlyBits=0)
Generates a topological fingerprint for a molecule using a series of pre-defined structural patterns...
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
a class for efficiently storing sparse vectors of ints
Definition: SparseIntVect.h:27
const std::string LayeredFingerprintMolVersion
Definition: Fingerprints.h:111
a class for bit vectors that are densely occupied