RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
Fingerprints.h
Go to the documentation of this file.
1//
2// Copyright (C) 2003-2020 Greg Landrum and Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_FINGERPRINTS_H
12#define RD_FINGERPRINTS_H
13
14#include <vector>
15#include <cstdint>
17
18class ExplicitBitVect;
19namespace RDKit {
20class ROMol;
21class MolBundle;
22
23//! \brief Generates a topological (Daylight like) fingerprint for a molecule
24//! using an alternate (faster) hashing algorithm
25/*!
26
27 \param mol: the molecule to be fingerprinted
28 \param minPath: the minimum path length (in bonds) to be included
29 \param maxPath: the minimum path length (in bonds) to be included
30 \param fpSize: the size of the fingerprint
31 \param nBitsPerHash: the number of bits to be set by each path
32 \param useHs: toggles inclusion of Hs in paths (if the molecule has
33 explicit Hs)
34 \param tgtDensity: if the generated fingerprint is below this density, it
35 will
36 be folded until the density is reached.
37 \param minSize: the minimum size to which the fingerprint will be
38 folded
39 \param branchedPaths: toggles generation of branched subgraphs, not just
40 linear paths
41 \param useBondOrders: toggles inclusion of bond orders in the path hashes
42 \param atomInvariants: a vector of atom invariants to use while hashing the
43 paths
44 \param fromAtoms: only paths starting at these atoms will be included
45 \param atomBits: used to return the bits that each atom is involved in
46 (should be at least \c mol.numAtoms long)
47
48 \return the molecular fingerprint, as an ExplicitBitVect
49
50 <b>Notes:</b>
51 - the caller is responsible for <tt>delete</tt>ing the result
52
53*/
55 const ROMol &mol, unsigned int minPath = 1, unsigned int maxPath = 7,
56 unsigned int fpSize = 2048, unsigned int nBitsPerHash = 2,
57 bool useHs = true, double tgtDensity = 0.0, unsigned int minSize = 128,
58 bool branchedPaths = true, bool useBondOrder = true,
59 std::vector<std::uint32_t> *atomInvariants = nullptr,
60 const std::vector<std::uint32_t> *fromAtoms = nullptr,
61 std::vector<std::vector<std::uint32_t>> *atomBits = nullptr,
62 std::map<std::uint32_t, std::vector<std::vector<int>>> *bitInfo = nullptr);
63const std::string RDKFingerprintMolVersion = "2.0.0";
64
65//! \brief Generates a topological (Daylight like) fingerprint for a molecule
66//! using a layer-based hashing algorithm
67/*!
68
69 <b>Experimental:</b> This function is experimental. The API or results may
70 change from
71 release to release.
72
73 \param mol: the molecule to be fingerprinted
74 \param layerFlags: the layers to be included (see below)
75 \param minPath: the minimum path length (in bonds) to be included
76 \param maxPath: the minimum path length (in bonds) to be included
77 \param fpSize: the size of the fingerprint
78 \param atomCounts: if provided, this will be used to provide the count of
79 the number
80 of paths that set bits each atom is involved in. The
81 vector should
82 have at least as many entries as the molecule has atoms
83 and is not
84 zeroed out here.
85 \param setOnlyBits: if provided, only bits that are set in this bit vector
86 will be set
87 in the result. This is essentially the same as doing:
88 (*res) &= (*setOnlyBits);
89 but also has an impact on the atomCounts (if being used)
90 \param branchedPaths: toggles generation of branched subgraphs, not just
91 linear paths
92
93 \return the molecular fingerprint, as an ExplicitBitVect
94
95 <b>Notes:</b>
96 - the caller is responsible for <tt>delete</tt>ing the result
97
98 <b>Layer definitions:</b>
99 - 0x01: pure topology
100 - 0x02: bond order
101 - 0x04: atom types
102 - 0x08: presence of rings
103 - 0x10: ring sizes
104 - 0x20: aromaticity
105*/
107 const ROMol &mol, unsigned int layerFlags = 0xFFFFFFFF,
108 unsigned int minPath = 1, unsigned int maxPath = 7,
109 unsigned int fpSize = 2048, std::vector<unsigned int> *atomCounts = nullptr,
110 ExplicitBitVect *setOnlyBits = nullptr, bool branchedPaths = true,
111 const std::vector<std::uint32_t> *fromAtoms = nullptr);
112const unsigned int maxFingerprintLayers = 10;
113const std::string LayeredFingerprintMolVersion = "0.7.0";
114const unsigned int substructLayers = 0x07;
115
116//! \brief Generates a topological fingerprint for a molecule
117//! using a series of pre-defined structural patterns
118/*!
119
120 <b>Experimental:</b> This function is experimental. The API or results may
121 change from
122 release to release.
123
124 \param mol: the molecule to be fingerprinted
125 \param fpSize: the size of the fingerprint
126 \param atomCounts: if provided, this will be used to provide the count of
127 the number
128 of paths that set bits each atom is involved in. The
129 vector should
130 have at least as many entries as the molecule has atoms
131 and is not
132 zeroed out here.
133 \param setOnlyBits: if provided, only bits that are set in this bit vector
134 will be set
135 in the result. This is essentially the same as doing:
136 (*res) &= (*setOnlyBits);
137 but also has an impact on the atomCounts (if being used)
138
139 \return the molecular fingerprint, as an ExplicitBitVect
140
141 <b>Notes:</b>
142 - the caller is responsible for <tt>delete</tt>ing the result
143
144*/
146 const ROMol &mol, unsigned int fpSize = 2048,
147 std::vector<unsigned int> *atomCounts = nullptr,
148 ExplicitBitVect *setOnlyBits = nullptr, bool tautomericFingerprint = false);
149const std::string PatternFingerprintMolVersion = "1.0.0";
150//! \overload
152 const MolBundle &bundle, unsigned int fpSize = 2048,
153 ExplicitBitVect *setOnlyBits = nullptr, bool tautomericFingerprint = false);
154
157 const ROMol &mol, unsigned int minPath = 1, unsigned int maxPath = 7,
158 bool useHs = true, bool branchedPaths = true, bool useBondOrder = true,
159 std::vector<std::uint32_t> *atomInvariants = nullptr,
160 const std::vector<std::uint32_t> *fromAtoms = nullptr,
161 std::vector<std::vector<boost::uint64_t>> *atomBits = nullptr,
162 std::map<boost::uint64_t, std::vector<std::vector<int>>> *bitInfo =
163 nullptr);
164
165} // namespace RDKit
166
167#endif
a class for bit vectors that are densely occupied
MolBundle contains a collection of related ROMols.
Definition MolBundle.h:59
a class for efficiently storing sparse vectors of ints
#define RDKIT_FINGERPRINTS_EXPORT
Definition export.h:177
Std stuff.
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::uint64_t > * getUnfoldedRDKFingerprintMol(const ROMol &mol, unsigned int minPath=1, unsigned int maxPath=7, bool useHs=true, bool branchedPaths=true, bool useBondOrder=true, std::vector< std::uint32_t > *atomInvariants=nullptr, const std::vector< std::uint32_t > *fromAtoms=nullptr, std::vector< std::vector< boost::uint64_t > > *atomBits=nullptr, std::map< boost::uint64_t, std::vector< std::vector< int > > > *bitInfo=nullptr)
bool rdvalue_is(const RDValue_cast_t)
const std::string RDKFingerprintMolVersion
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * PatternFingerprintMol(const ROMol &mol, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=nullptr, ExplicitBitVect *setOnlyBits=nullptr, bool tautomericFingerprint=false)
Generates a topological fingerprint for a molecule using a series of pre-defined structural patterns.
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * LayeredFingerprintMol(const ROMol &mol, unsigned int layerFlags=0xFFFFFFFF, unsigned int minPath=1, unsigned int maxPath=7, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=nullptr, ExplicitBitVect *setOnlyBits=nullptr, bool branchedPaths=true, const std::vector< std::uint32_t > *fromAtoms=nullptr)
Generates a topological (Daylight like) fingerprint for a molecule using a layer-based hashing algori...
const unsigned int maxFingerprintLayers
const std::string LayeredFingerprintMolVersion
const std::string PatternFingerprintMolVersion
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * RDKFingerprintMol(const ROMol &mol, unsigned int minPath=1, unsigned int maxPath=7, unsigned int fpSize=2048, unsigned int nBitsPerHash=2, bool useHs=true, double tgtDensity=0.0, unsigned int minSize=128, bool branchedPaths=true, bool useBondOrder=true, std::vector< std::uint32_t > *atomInvariants=nullptr, const std::vector< std::uint32_t > *fromAtoms=nullptr, std::vector< std::vector< std::uint32_t > > *atomBits=nullptr, std::map< std::uint32_t, std::vector< std::vector< int > > > *bitInfo=nullptr)
Generates a topological (Daylight like) fingerprint for a molecule using an alternate (faster) hashin...
const unsigned int substructLayers