RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
EnumerationStrategyBase.h
Go to the documentation of this file.
1//
2// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following
13// disclaimer in the documentation and/or other materials provided
14// with the distribution.
15// * Neither the name of Novartis Institutes for BioMedical Research Inc.
16// nor the names of its contributors may be used to endorse or promote
17// products derived from this software without specific prior written
18// permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//
32#include <RDGeneral/export.h>
33#ifndef ENUMERATION_STRATEGY_H
34#define ENUMERATION_STRATEGY_H
35
36#include "EnumerateTypes.h"
37#include "../Reaction.h"
38#include <utility>
39#include <vector>
41#include <cstdint>
42#ifdef RDK_USE_BOOST_SERIALIZATION
43#include <boost/serialization/assume_abstract.hpp>
44#include <boost/serialization/vector.hpp>
45// the next two includes need to be there for boost 1.56
46#include <boost/serialization/singleton.hpp>
47#include <boost/serialization/extended_type_info.hpp>
48#include <boost/serialization/shared_ptr.hpp>
49#endif
51
52#include <GraphMol/RDKitBase.h>
53
54namespace RDKit {
55
56//! class for flagging enumeration strategy errors
58 : public std::exception {
59 public:
60 EnumerationStrategyException(const char *msg) : _msg(msg) {}
61 EnumerationStrategyException(std::string msg) : _msg(std::move(msg)) {}
62 const char *what() const noexcept override { return _msg.c_str(); }
63 ~EnumerationStrategyException() noexcept override = default;
64
65 private:
66 std::string _msg;
67};
68
69//! Return the number of elements per input vector
70/*! \param bbs vector<vector<T> >
71
72 \result vector<unint64_t> number of elements in each vector
73 */
74template <class T>
75EnumerationTypes::RGROUPS getSizesFromBBs(
76 const std::vector<std::vector<T>> &bbs) {
78 for (size_t i = 0; i < bbs.size(); ++i) {
79 sizes.push_back(bbs[i].size());
80 }
81 return sizes;
82}
83
84//! getSizesFromReactants
85//! Helper function for enumeration, bbs are stored in a
86//! std::vector< std::vector<boost:shared_ptr<ROMol> >
87//
89 const std::vector<MOL_SPTR_VECT> &bbs);
90
91//! getReactantsFromRGroups
92//! Helper function for enumeration, bbs are stored in a
93//! std::vector< std::vector<boost:shared_ptr<ROMol> >
94//
96getReactantsFromRGroups(const std::vector<MOL_SPTR_VECT> &bbs,
97 const EnumerationTypes::RGROUPS &rgroups);
98
99//! computeNumProducts
100//! Returns the number of possible product combination from
101//! The given numbers of building blocks for each rgroup
102//! or EnumerationStrategyBase::EnumerationOverflow if the
103//! number will not fit into the machines integer type.
104//! n.b. An overflow simply means there are a lot of products
105//! not that they cannot be enumerated
107 const EnumerationTypes::RGROUPS &sizes);
108
109//! Base Class for enumeration strategies
110//! Usage:
111//! EnumerationStrategyBase must be initialized with both a reaction
112//! and the building block (molecule) vector to be sampled.
113//!
114//! \verbatim
115//! EnumerationStrategyBase &eb = ...
116//! if(eb) { // can we get another entry
117//! const std::vector<int> &v = eb.next();
118//! v[0] // RGroup 0 position
119//! v[1] // RGroup 1 position...
120//! }
121//! \endverbatim
122
124 protected:
125 EnumerationTypes::RGROUPS m_permutation; // where are we currently?
127 m_permutationSizes; // m_permutationSizes num bbs per group
128 boost::uint64_t
129 m_numPermutations{}; // total number of permutations for this group
130 // -1 if > ssize_t::max
131 public:
132 static const boost::uint64_t EnumerationOverflow =
133 static_cast<boost::uint64_t>(-1);
134 EnumerationStrategyBase() : m_permutation(), m_permutationSizes() {}
135
137
138 virtual const char *type() const { return "EnumerationStrategyBase"; }
139
140 //! Initialize the enumerator based on the reaction and the
141 //! supplied building blocks
142 //! This is the standard API point.
143 //! This calls the derived class's initializeStrategy method which must be
144 //! implemented
145 void initialize(const ChemicalReaction &reaction,
146 const EnumerationTypes::BBS &building_blocks) {
147 // default initialization, may be overridden (sets the # reactants
148 // and computes the default # of permutations)
149 m_permutationSizes = getSizesFromBBs(building_blocks);
150 m_permutation.resize(m_permutationSizes.size());
151
152 m_numPermutations = computeNumProducts(m_permutationSizes);
153 std::fill(m_permutation.begin(), m_permutation.end(), 0);
154
155 initializeStrategy(reaction, building_blocks);
156 }
157
158 // ! Initialize derived class. Must exist.
159 // ! EnumerationStrategyBase structures are already initialized:
160 // ! m_permutationSizes - [ length of building blocks for each reactant set ]
161 // ! m_numPermutations - number of possible permutations
162 // ! ( -1 if not computable )
163 // ! m_permutation - the first permutation, always the first supplied
164 // ! reactants
165 virtual void initializeStrategy(
166 const ChemicalReaction &reaction,
167 const EnumerationTypes::BBS &building_blocks) = 0;
168
169 //! returns true if there are more permutations left
170 //! random enumerators may always return true...
171 virtual operator bool() const = 0;
172
173 //! The current permutation {r1, r2, ...}
174 virtual const EnumerationTypes::RGROUPS &next() = 0;
175
176 //! copy the enumeration strategy complete with current state
177 virtual EnumerationStrategyBase *copy() const = 0;
178
179 //! The current position in the enumeration
180 const EnumerationTypes::RGROUPS &getPosition() const { return m_permutation; }
181
182 //! a result of EnumerationOverflow indicates that the number of
183 //! permutations is not computable with the current
184 //! rdlonglong size.
185 boost::uint64_t getNumPermutations() const { return m_numPermutations; }
186
187 //! Returns how many permutations have been processed by this strategy
188 virtual boost::uint64_t getPermutationIdx() const = 0;
189
190 //! Skip the specified number of permutations (useful for
191 //! resetting state to a known position)
192 bool skip(boost::uint64_t skipCount) {
193 for (boost::uint64_t i = 0; i < skipCount; ++i) {
194 next();
195 }
196 return true;
197 }
198
199 protected:
200 //! Initialize the internal data structures
201 //! i.e. RGROUPS = {10,40,50};
203 m_permutation.resize(rgroups.size());
204 m_permutationSizes = rgroups;
205 m_numPermutations = computeNumProducts(m_permutationSizes);
206 std::fill(m_permutation.begin(), m_permutation.end(), 0);
207 }
208
209 private:
210 friend class boost::serialization::access;
211 template <class Archive>
212 void serialize(Archive &ar, const unsigned int /*version*/) {
213 ar &m_permutation;
214 ar &m_permutationSizes;
215 ar &m_numPermutations;
216 }
217};
218#ifdef RDK_USE_BOOST_SERIALIZATION
219BOOST_SERIALIZATION_ASSUME_ABSTRACT(EnumerationStrategyBase)
220#endif
221} // namespace RDKit
222
223#ifdef RDK_USE_BOOST_SERIALIZATION
225#endif
226
227#endif
pulls in the core RDKit functionality
This is a class for storing and applying general chemical reactions.
Definition Reaction.h:121
virtual EnumerationStrategyBase * copy() const =0
copy the enumeration strategy complete with current state
void initialize(const ChemicalReaction &reaction, const EnumerationTypes::BBS &building_blocks)
virtual const char * type() const
bool skip(boost::uint64_t skipCount)
virtual boost::uint64_t getPermutationIdx() const =0
Returns how many permutations have been processed by this strategy.
virtual void initializeStrategy(const ChemicalReaction &reaction, const EnumerationTypes::BBS &building_blocks)=0
EnumerationTypes::RGROUPS m_permutation
void internalInitialize(const EnumerationTypes::RGROUPS &rgroups)
boost::uint64_t getNumPermutations() const
EnumerationTypes::RGROUPS m_permutationSizes
const EnumerationTypes::RGROUPS & getPosition() const
The current position in the enumeration.
virtual const EnumerationTypes::RGROUPS & next()=0
The current permutation {r1, r2, ...}.
class for flagging enumeration strategy errors
~EnumerationStrategyException() noexcept override=default
const char * what() const noexcept override
#define RDKIT_CHEMREACTIONS_EXPORT
Definition export.h:49
std::vector< boost::uint64_t > RGROUPS
std::vector< MOL_SPTR_VECT > BBS
Std stuff.
bool rdvalue_is(const RDValue_cast_t)
RDKIT_CHEMREACTIONS_EXPORT EnumerationTypes::RGROUPS getSizesFromReactants(const std::vector< MOL_SPTR_VECT > &bbs)
EnumerationTypes::RGROUPS getSizesFromBBs(const std::vector< std::vector< T > > &bbs)
Return the number of elements per input vector.
std::vector< boost::shared_ptr< ROMol > > MOL_SPTR_VECT
RDKIT_CHEMREACTIONS_EXPORT MOL_SPTR_VECT getReactantsFromRGroups(const std::vector< MOL_SPTR_VECT > &bbs, const EnumerationTypes::RGROUPS &rgroups)
RDKIT_CHEMREACTIONS_EXPORT boost::uint64_t computeNumProducts(const EnumerationTypes::RGROUPS &sizes)