RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MorganGenerator.h
Go to the documentation of this file.
1//
2// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RD_MORGANGEN_H_2018_07
13#define RD_MORGANGEN_H_2018_07
14
16#include <cstdint>
17
18namespace RDKit {
19
20namespace MorganFingerprint {
21
22/**
23 \brief Default atom invariants generator for Morgan fingerprint, generates
24 ECFP-type invariants
25
26 */
29 const bool df_includeRingMembership;
30
31 public:
32 /**
33 \brief Construct a new MorganAtomInvGenerator object
34
35 \param includeRingMembership : if set, whether or not the atom is in a ring
36 will be used in the invariant list.
37 */
38 MorganAtomInvGenerator(const bool includeRingMembership = true);
39
40 std::vector<std::uint32_t> *getAtomInvariants(
41 const ROMol &mol) const override;
42
43 std::string infoString() const override;
44 MorganAtomInvGenerator *clone() const override;
45};
46
47/**
48 \brief Alternative atom invariants generator for Morgan fingerprint, generate
49 FCFP-type invariants
50
51 */
54 std::vector<const ROMol *> *dp_patterns;
55
56 public:
57 /**
58 \brief Construct a new MorganFeatureAtomInvGenerator object
59
60 \param patterns : if provided should contain the queries used to assign
61 atom-types. if not provided, feature definitions adapted from reference:
62 Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998) will be used for
63 Donor, Acceptor, Aromatic, Halogen, Basic, Acidic.
64 */
65 MorganFeatureAtomInvGenerator(std::vector<const ROMol *> *patterns = nullptr);
66
67 std::vector<std::uint32_t> *getAtomInvariants(
68 const ROMol &mol) const override;
69
70 std::string infoString() const override;
72};
73
74/**
75 \brief Bond invariants generator for Morgan fingerprint
76
77 */
80 const bool df_useBondTypes;
81 const bool df_useChirality;
82
83 public:
84 /**
85 \brief Construct a new MorganBondInvGenerator object
86
87 \param useBondTypes : if set, bond types will be included as a part of the
88 bond invariants
89 \param useChirality : if set, chirality information will be included as a
90 part of the bond invariants
91 */
92 MorganBondInvGenerator(const bool useBondTypes = true,
93 const bool useChirality = false);
94
95 std::vector<std::uint32_t> *getBondInvariants(
96 const ROMol &mol) const override;
97
98 std::string infoString() const override;
99 MorganBondInvGenerator *clone() const override;
100 ~MorganBondInvGenerator() override = default;
101};
102
103/**
104 \brief Class for holding Morgan fingerprint specific arguments
105
106 */
108 public:
109 bool df_onlyNonzeroInvariants = false;
110 unsigned int d_radius = 3;
111 bool df_includeRedundantEnvironments = false;
112 bool df_useBondTypes = true;
113
114 std::string infoString() const override;
115
116 /**
117 \brief Construct a new MorganArguments object
118
119 \param radius the number of iterations to grow the fingerprint
120 \param countSimulation if set, use count simulation while generating the
121 fingerprint
122 \param includeChirality if set, chirality information will be added to the
123 generated bit id, independently from bond invariants
124 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
125 have a nonzero invariant
126 \param countBounds boundaries for count simulation, corresponding bit will
127 be set if the count is higher than the number provided for that spot
128 \param fpSize size of the generated fingerprint, does not affect the sparse
129 versions
130 \param includeRedundantEnvironments if set redundant environments will be
131 included in the fingerprint
132 \param useBondTypes if set bond types will be included in the fingerprint
133 */
134 MorganArguments(unsigned int radius = 3, bool countSimulation = false,
135 bool includeChirality = false,
136 bool onlyNonzeroInvariants = false,
137 std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
138 std::uint32_t fpSize = 2048,
139 bool includeRedundantEnvironments = false,
140 bool useBondTypes = true)
141 : FingerprintArguments(countSimulation, countBounds, fpSize, 1,
142 includeChirality),
143 df_onlyNonzeroInvariants(onlyNonzeroInvariants),
144 d_radius(radius),
145 df_includeRedundantEnvironments(includeRedundantEnvironments),
146 df_useBondTypes(useBondTypes) {};
147};
148
149/**
150 \brief Class for holding the bit-id created from Morgan fingerprint
151 environments and the additional data necessary extra outputs
152
153 */
154template <typename OutputType>
156 : public AtomEnvironment<OutputType> {
157 const OutputType d_code;
158 const unsigned int d_atomId;
159 const unsigned int d_layer;
160
161 public:
162 OutputType getBitId(
163 FingerprintArguments *arguments, // unused
164 const std::vector<std::uint32_t> *atomInvariants, // unused
165 const std::vector<std::uint32_t> *bondInvariants, // unused
166 AdditionalOutput *additionalOutput, // unused
167 const bool hashResults = false, // unused
168 const std::uint64_t fpSize = 0 // unused
169 ) const override;
171 size_t bitId) const override;
172
173 /**
174 \brief Construct a new MorganAtomEnv object
175
176 \param code bit id generated from this environment
177 \param atomId atom id of the atom at the center of this environment
178 \param layer radius of this environment
179 */
180 MorganAtomEnv(const std::uint32_t code, const unsigned int atomId,
181 const unsigned int layer);
182};
183
184/**
185 \brief Class that generates atom environments for Morgan fingerprint
186
187 */
188template <typename OutputType>
190 : public AtomEnvironmentGenerator<OutputType> {
191 public:
192 std::vector<AtomEnvironment<OutputType> *> getEnvironments(
193 const ROMol &mol, FingerprintArguments *arguments,
194 const std::vector<std::uint32_t> *fromAtoms,
195 const std::vector<std::uint32_t> *ignoreAtoms, const int confId,
196 const AdditionalOutput *additionalOutput,
197 const std::vector<std::uint32_t> *atomInvariants,
198 const std::vector<std::uint32_t> *bondInvariants,
199 const bool hashResults = false) const override;
200
201 std::string infoString() const override;
202 OutputType getResultSize() const override;
203};
204
205/**
206 \brief Get a fingerprint generator for Morgan fingerprint
207
208 \tparam OutputType determines the size of the bitIds and the result, can be 32
209 or 64 bit unsigned integer
210
211 \param radius the number of iterations to grow the fingerprint
212
213 \param countSimulation if set, use count simulation while generating the
214 fingerprint
215
216 \param includeChirality if set, chirality information will be added to the
217 generated bit id, independently from bond invariants
218
219 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
220 have a nonzero invariant
221
222 \param countBounds boundaries for count simulation, corresponding bit will be
223 set if the count is higher than the number provided for that spot
224
225 \param fpSize size of the generated fingerprint, does not affect the sparse
226 versions
227 \param countSimulation if set, use count simulation while generating the
228 fingerprint
229 \param includeChirality sets includeChirality flag for both MorganArguments
230 and the default bond generator MorganBondInvGenerator
231 \param useBondTypes if set, bond types will be included as a part of the
232 default bond invariants
233 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
234 have a nonzero invariant
235 \param includeRedundantEnvironments if set redundant environments will be
236 included in the fingerprint
237 \param atomInvariantsGenerator custom atom invariants generator to use
238 \param bondInvariantsGenerator custom bond invariants generator to use
239 \param ownsAtomInvGen if set atom invariants generator is destroyed with the
240 fingerprint generator
241 \param ownsBondInvGen if set bond invariants generator is destroyed with the
242 fingerprint generator
243
244 \return FingerprintGenerator<OutputType>* that generates Morgan fingerprints
245
246This generator supports the following \c AdditionalOutput types:
247 - \c atomToBits : which bits each atom is the central atom for
248 - \c atomCounts : how many bits each atom sets
249 - \c bitInfoMap : map from bitId to (atomId, radius) pairs
250
251 */
252template <typename OutputType>
254 unsigned int radius, bool countSimulation, bool includeChirality,
259 std::uint32_t fpSize = 2048,
260 std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
261 bool ownsAtomInvGen = false, bool ownsBondInvGen = false);
262//! \overload
263template <typename OutputType>
265 const MorganArguments &args,
268 bool ownsAtomInvGen = false, bool ownsBondInvGen = false);
269
270/**
271 \brief Get a fingerprint generator for Morgan fingerprint
272
273 \tparam OutputType determines the size of the bitIds and the result, can be 32
274 or 64 bit unsigned integer
275
276 \param radius the number of iterations to grow the fingerprint
277
278 \param countSimulation if set, use count simulation while generating the
279 fingerprint
280
281 \param includeChirality if set, chirality information will be added to the
282 generated bit id, independently from bond invariants
283
284 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
285 have a nonzero invariant
286
287 \param countBounds boundaries for count simulation, corresponding bit will be
288 set if the count is higher than the number provided for that spot
289
290 \param fpSize size of the generated fingerprint, does not affect the sparse
291 versions
292 \param countSimulation if set, use count simulation while generating the
293 fingerprint
294 \param includeChirality sets includeChirality flag for both MorganArguments
295 and the default bond generator MorganBondInvGenerator
296 \param useBondTypes if set, bond types will be included as a part of the
297 default bond invariants
298 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
299 have a nonzero invariant
300 \param atomInvariantsGenerator custom atom invariants generator to use
301 \param bondInvariantsGenerator custom bond invariants generator to use
302 \param ownsAtomInvGen if set atom invariants generator is destroyed with the
303 fingerprint generator
304 \param ownsBondInvGen if set bond invariants generator is destroyed with the
305 fingerprint generator
306
307 \return FingerprintGenerator<OutputType>* that generates Morgan fingerprints
308
309This generator supports the following \c AdditionalOutput types:
310 - \c atomToBits : which bits each atom is the central atom for
311 - \c atomCounts : how many bits each atom sets
312 - \c bitInfoMap : map from bitId to (atomId, radius) pairs
313
314 */
315template <typename OutputType>
317 unsigned int radius, bool countSimulation = false,
318 bool includeChirality = false, bool useBondTypes = true,
319 bool onlyNonzeroInvariants = false,
322 std::uint32_t fpSize = 2048,
323 std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
324 bool ownsAtomInvGen = false, bool ownsBondInvGen = false) {
330};
331
332} // namespace MorganFingerprint
333} // namespace RDKit
334
335#endif
abstract base class that generates atom-environments from a molecule
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
abstract base class for atom invariants generators
abstract base class for bond invariants generators
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
class that generates same fingerprint style for different output formats
Class for holding Morgan fingerprint specific arguments.
MorganArguments(unsigned int radius=3, bool countSimulation=false, bool includeChirality=false, bool onlyNonzeroInvariants=false, std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, std::uint32_t fpSize=2048, bool includeRedundantEnvironments=false, bool useBondTypes=true)
Construct a new MorganArguments object.
std::string infoString() const override
method that returns information string about the fingerprint specific argument set and the arguments ...
Class for holding the bit-id created from Morgan fingerprint environments and the additional data nec...
void updateAdditionalOutput(AdditionalOutput *output, size_t bitId) const override
MorganAtomEnv(const std::uint32_t code, const unsigned int atomId, const unsigned int layer)
Construct a new MorganAtomEnv object.
OutputType getBitId(FingerprintArguments *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, AdditionalOutput *additionalOutput, const bool hashResults=false, const std::uint64_t fpSize=0) const override
calculates and returns the bit id to be set for this atom-environment
Default atom invariants generator for Morgan fingerprint, generates ECFP-type invariants.
MorganAtomInvGenerator(const bool includeRingMembership=true)
Construct a new MorganAtomInvGenerator object.
MorganAtomInvGenerator * clone() const override
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
Bond invariants generator for Morgan fingerprint.
std::string infoString() const override
method that returns information about this /c BondInvariantsGenerator and its arguments
MorganBondInvGenerator * clone() const override
MorganBondInvGenerator(const bool useBondTypes=true, const bool useChirality=false)
Construct a new MorganBondInvGenerator object.
std::vector< std::uint32_t > * getBondInvariants(const ROMol &mol) const override
get bond invariants from a molecule
Class that generates atom environments for Morgan fingerprint.
std::string infoString() const override
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
OutputType getResultSize() const override
Returns the size of the fingerprint based on arguments.
std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments *arguments, const std::vector< std::uint32_t > *fromAtoms, const std::vector< std::uint32_t > *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const bool hashResults=false) const override
generate and return all atom-envorinments from a molecule
Alternative atom invariants generator for Morgan fingerprint, generate FCFP-type invariants.
MorganFeatureAtomInvGenerator(std::vector< const ROMol * > *patterns=nullptr)
Construct a new MorganFeatureAtomInvGenerator object.
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
MorganFeatureAtomInvGenerator * clone() const override
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
#define RDKIT_FINGERPRINTS_EXPORT
Definition export.h:177
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getMorganGenerator(unsigned int radius, bool countSimulation, bool includeChirality, bool useBondTypes, bool onlyNonzeroInvariants, bool includeRedundantEnvironments, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, BondInvariantsGenerator *bondInvariantsGenerator=nullptr, std::uint32_t fpSize=2048, std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, bool ownsAtomInvGen=false, bool ownsBondInvGen=false)
Get a fingerprint generator for Morgan fingerprint.
Std stuff.
bool rdvalue_is(const RDValue_cast_t)