RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
RDKitFPGenerator.h
Go to the documentation of this file.
1//
2// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RD_RDFINGERPRINTGEN_H_2018_07
13#define RD_RDFINGERPRINTGEN_H_2018_07
14
16
17namespace RDKit {
18namespace RDKitFP {
19
21 public:
22 unsigned int d_minPath = 1;
23 unsigned int d_maxPath = 7;
24 bool df_useHs = true;
25 bool df_branchedPaths = true;
26 bool df_useBondOrder = true;
27
28 std::string infoString() const override;
29
30 /**
31 \brief Construct a new RDKitFPArguments object
32
33 \param minPath the minimum path length (in bonds) to be included
34 \param maxPath the maximum path length (in bonds) to be included
35 \param useHs toggles inclusion of Hs in paths (if the molecule has
36 explicit Hs)
37 \param branchedPaths toggles generation of branched subgraphs, not just
38 linear paths
39 \param useBondOrder toggles inclusion of bond orders in the path hashes
40 \param countSimulation if set, use count simulation while
41 generating the fingerprint
42 \param countBounds boundaries for count simulation, corresponding bit will
43 be set if the count is higher than the number provided for that spot
44 \param fpSize size of the generated fingerprint, does not affect the sparse
45 versions
46 \param numBitsPerFeature controls the number of bits that are set for each
47 path/subgraph found
48
49 */
50 RDKitFPArguments(unsigned int minPath = 1, unsigned int maxPath = 7,
51 bool useHs = true, bool branchedPaths = true,
52 bool useBondOrder = true, bool countSimulation = false,
53 const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
54 std::uint32_t fpSize = 2048,
55 std::uint32_t numBitsPerFeature = 2);
56};
57
60 public:
61 std::vector<std::uint32_t> *getAtomInvariants(
62 const ROMol &mol) const override;
63
64 std::string infoString() const override;
65 RDKitFPAtomInvGenerator *clone() const override;
66};
67
68template <typename OutputType>
70 : public AtomEnvironment<OutputType> {
71 const OutputType d_bitId;
72 const boost::dynamic_bitset<> d_atomsInPath;
73 const INT_VECT d_bondPath;
74
75 public:
76 OutputType getBitId(
77 FingerprintArguments *arguments, // unused
78 const std::vector<std::uint32_t> *atomInvariants, // unused
79 const std::vector<std::uint32_t> *bondInvariants, // unused
80 AdditionalOutput *additionalOutput, // unused
81 bool hashResults = false, // unused
82 const std::uint64_t fpSize = 0 // unused
83 ) const override;
85 size_t bitId) const override;
86
87 /**
88 \brief Construct a new RDKitFPAtomEnv object
89
90 \param bitId bitId generated for this environment
91 \param atomsInPath holds atoms in this environment to set additional output
92 \param bondPath the bond path defining the environment
93
94 */
95 RDKitFPAtomEnv(const OutputType bitId, boost::dynamic_bitset<> atomsInPath,
96 INT_VECT bondPath)
97 : d_bitId(bitId),
98 d_atomsInPath(std::move(atomsInPath)),
99 d_bondPath(std::move(bondPath)) {}
100};
101
102template <typename OutputType>
104 : public AtomEnvironmentGenerator<OutputType> {
105 public:
106 std::vector<AtomEnvironment<OutputType> *> getEnvironments(
107 const ROMol &mol, FingerprintArguments *arguments,
108 const std::vector<std::uint32_t> *fromAtoms,
109 const std::vector<std::uint32_t> *ignoreAtoms, int confId,
110 const AdditionalOutput *additionalOutput,
111 const std::vector<std::uint32_t> *atomInvariants,
112 const std::vector<std::uint32_t> *bondInvariants,
113 bool hashResults = false) const override;
114
115 std::string infoString() const override;
116 OutputType getResultSize() const override;
117
118}; // namespace RDKitFP
119
120/**
121 \brief Get a RDKit fingerprint generator with given parameters
122
123 \tparam OutputType determines the size of the bitIds and the result, can be 32
124 or 64 bit unsigned integer
125 \param minPath the minimum path length (in bonds) to be included
126 \param maxPath the maximum path length (in bonds) to be included
127 \param useHs toggles inclusion of Hs in paths (if the molecule has
128 explicit Hs)
129 \param branchedPaths toggles generation of branched subgraphs, not just
130 linear paths
131 \param useBondOrder toggles inclusion of bond orders in the path hashes
132 \param atomInvariantsGenerator custom atom invariants generator to use
133 \param countSimulation if set, use count simulation while
134 generating the fingerprint
135 \param countBounds boundaries for count simulation, corresponding bit will be
136 set if the count is higher than the number provided for that spot
137 \param fpSize size of the generated fingerprint, does not affect the sparse
138 versions
139 \param numBitsPerFeature controls the number of bits that are set for each
140 path/subgraph found
141 \param ownsAtomInvGen if set atom invariants generator is destroyed with the
142 fingerprint generator
143
144 /return FingerprintGenerator<OutputType>* that generates RDKit fingerprints
145
146 This generator supports the following \c AdditionalOutput types:
147 - \c atomToBits : which bits each atom is involved in
148 - \c atomCounts : how many bits each atom sets
149 - \c bitPaths : map from bitId to vectors of bond indices for the individual
150 subgraphs
151
152 */
153template <typename OutputType>
155 unsigned int minPath = 1, unsigned int maxPath = 7, bool useHs = true,
156 bool branchedPaths = true, bool useBondOrder = true,
158 bool countSimulation = false,
159 const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
160 std::uint32_t fpSize = 2048, std::uint32_t numBitsPerFeature = 2,
161 bool ownsAtomInvGen = false);
162// \overload
163template <typename OutputType>
165 const RDKitFPArguments &args,
167 bool ownsAtomInvGen = false);
168
169} // namespace RDKitFP
170} // namespace RDKit
171
172#endif
abstract base class that generates atom-environments from a molecule
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
abstract base class for atom invariants generators
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
class that generates same fingerprint style for different output formats
std::string infoString() const override
method that returns information string about the fingerprint specific argument set and the arguments ...
RDKitFPArguments(unsigned int minPath=1, unsigned int maxPath=7, bool useHs=true, bool branchedPaths=true, bool useBondOrder=true, bool countSimulation=false, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, std::uint32_t fpSize=2048, std::uint32_t numBitsPerFeature=2)
Construct a new RDKitFPArguments object.
RDKitFPAtomEnv(const OutputType bitId, boost::dynamic_bitset<> atomsInPath, INT_VECT bondPath)
Construct a new RDKitFPAtomEnv object.
OutputType getBitId(FingerprintArguments *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, AdditionalOutput *additionalOutput, bool hashResults=false, const std::uint64_t fpSize=0) const override
calculates and returns the bit id to be set for this atom-environment
void updateAdditionalOutput(AdditionalOutput *output, size_t bitId) const override
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
RDKitFPAtomInvGenerator * clone() const override
OutputType getResultSize() const override
Returns the size of the fingerprint based on arguments.
std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments *arguments, const std::vector< std::uint32_t > *fromAtoms, const std::vector< std::uint32_t > *ignoreAtoms, int confId, const AdditionalOutput *additionalOutput, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, bool hashResults=false) const override
generate and return all atom-envorinments from a molecule
std::string infoString() const override
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
#define RDKIT_FINGERPRINTS_EXPORT
Definition export.h:177
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getRDKitFPGenerator(unsigned int minPath=1, unsigned int maxPath=7, bool useHs=true, bool branchedPaths=true, bool useBondOrder=true, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, bool countSimulation=false, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, std::uint32_t fpSize=2048, std::uint32_t numBitsPerFeature=2, bool ownsAtomInvGen=false)
Get a RDKit fingerprint generator with given parameters.
Std stuff.
std::vector< int > INT_VECT
Definition types.h:291
bool rdvalue_is(const RDValue_cast_t)