RDKit
Open-source cheminformatics and machine learning.
EnumerationStrategyBase.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior written
18 // permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 #ifndef ENUMERATION_STRATEGY_H
33 #define ENUMERATION_STRATEGY_H
34 
35 #include "EnumerateTypes.h"
36 #include "../Reaction.h"
37 #include <vector>
39 #include <boost/cstdint.hpp>
40 #include <boost/serialization/assume_abstract.hpp>
41 #include <boost/serialization/vector.hpp>
42 // the next two includes need to be there for boost 1.56
43 #include <boost/serialization/singleton.hpp>
44 #include <boost/serialization/extended_type_info.hpp>
45 #include <boost/serialization/shared_ptr.hpp>
47 
48 #include <GraphMol/RDKitBase.h>
49 
50 namespace RDKit {
51 
52 //! class for flagging enumeration strategy errors
53 class EnumerationStrategyException : public std::exception {
54  public:
55  EnumerationStrategyException(const char *msg) : _msg(msg){};
56  EnumerationStrategyException(const std::string &msg) : _msg(msg){};
57  const char *message() const { return _msg.c_str(); };
59 
60  private:
61  std::string _msg;
62 };
63 
64 //! Return the number of elements per input vector
65 /*! \param bbs vector<vector<T> >
66 
67  \result vector<unint64_t> number of elements in each vector
68  */
69 template <class T>
71  const std::vector<std::vector<T> > &bbs) {
73  for (size_t i = 0; i < bbs.size(); ++i) sizes.push_back(bbs[i].size());
74  return sizes;
75 }
76 
77 //! getSizesFromReactants
78 //! Helper function for enumeration, bbs are stored in a
79 //! std::vector< std::vector<boost:shared_ptr<ROMol> >
80 //
82  const std::vector<MOL_SPTR_VECT> &bbs);
83 
84 //! getReactantsFromRGroups
85 //! Helper function for enumeration, bbs are stored in a
86 //! std::vector< std::vector<boost:shared_ptr<ROMol> >
87 //
88 MOL_SPTR_VECT getReactantsFromRGroups(const std::vector<MOL_SPTR_VECT> &bbs,
89  const EnumerationTypes::RGROUPS &rgroups);
90 
91 //! computeNumProducts
92 //! Returns the number of possible product combination from
93 //! The given numbers of building blocks for each rgroup
94 //! or EnumerationStrategyBase::EnumerationOverflow if the
95 //! number will not fit into the machines integer type.
96 //! n.b. An overflow simply means there are a lot of products
97 //! not that they cannot be enumerated
98 boost::uint64_t computeNumProducts(const EnumerationTypes::RGROUPS &sizes);
99 
100 //! Base Class for enumeration strageties
101 //! Usage:
102 //! EnumerationStrategyBase must be initialized with both a reaction
103 //! and the building block (molecule) vector to be sampled.
104 //!
105 //! \verbatim
106 //! EnumerationStrategyBase &eb = ...
107 //! if(eb) { // can we get another entry
108 //! const std::vector<int> &v = eb.next();
109 //! v[0] // RGroup 0 position
110 //! v[1] // RGroup 1 position...
111 //! }
112 //! \endverbatim
113 
115  protected:
116  EnumerationTypes::RGROUPS m_permutation; // where are we currently?
118  m_permutationSizes; // m_permutationSizes num bbs per group
119  boost::uint64_t
120  m_numPermutations; // total number of permutations for this group
121  // -1 if > ssize_t::max
122  public:
123  static const boost::uint64_t EnumerationOverflow =
124  static_cast<boost::uint64_t>(-1);
126  : m_permutation(), m_permutationSizes(), m_numPermutations() {}
127 
129 
130  virtual const char *type() const { return "EnumerationStrategyBase"; }
131 
132  //! Initialize the enumerator based on the reaction and the
133  //! supplied building blocks
134  //! This is the standard API point.
135  void initialize(const ChemicalReaction &reaction,
136  const EnumerationTypes::BBS &building_blocks) {
137  // default initialization, may be overridden (sets the # reactants
138  // and computes the default # of permutations)
139  m_permutationSizes = getSizesFromBBs(building_blocks);
140  m_permutation.resize(m_permutationSizes.size());
141 
142  m_numPermutations = computeNumProducts(m_permutationSizes);
143  std::fill(m_permutation.begin(), m_permutation.end(), 0);
144 
145  initializeStrategy(reaction, building_blocks);
146  }
147 
148  // ! Initialize derived class
149  // ! must exist, EnumerationStrategyBase structures are already initialized
150  virtual void initializeStrategy(
151  const ChemicalReaction &reaction,
152  const EnumerationTypes::BBS &building_blocks) = 0;
153 
154  //! returns true if there are more permutations left
155  //! random enumerators may always return true...
156  virtual operator bool() const = 0;
157 
158  //! The current permutation {r1, r2, ...}
159  virtual const EnumerationTypes::RGROUPS &next() = 0;
160 
161  //! copy the enumeration strategy complete with current state
162  virtual EnumerationStrategyBase *copy() const = 0;
163 
164  //! The current position in the enumeration
165  const EnumerationTypes::RGROUPS &getPosition() const { return m_permutation; }
166 
167  //! a result of EnumerationOverflow indicates that the number of
168  //! permutations is not computable with the current
169  //! rdlonglong size.
170  boost::uint64_t getNumPermutations() const { return m_numPermutations; }
171 
172  //! Returns how many permutations have been processed by this strategy
173  virtual boost::uint64_t getPermutationIdx() const = 0;
174 
175  //! Skip the specified number of permutations (useful for
176  //! resetting state to a known position)
177  bool skip(boost::uint64_t skipCount) {
178  for (boost::uint64_t i = 0; i < skipCount; ++i) next();
179  return true;
180  }
181 
182  protected:
183  //! Initialize the internal data structures
184  //! i.e. RGROUPS = {10,40,50};
186  m_permutation.resize(rgroups.size());
187  m_permutationSizes = rgroups;
188  m_numPermutations = computeNumProducts(m_permutationSizes);
189  std::fill(m_permutation.begin(), m_permutation.end(), 0);
190  }
191 
192  private:
193  friend class boost::serialization::access;
194  template <class Archive>
195  void serialize(Archive &ar, const unsigned int /*version*/) {
196  ar &m_permutation;
197  ar &m_permutationSizes;
198  ar &m_numPermutations;
199  }
200 };
201 
202 BOOST_SERIALIZATION_ASSUME_ABSTRACT(EnumerationStrategyBase)
203 }
204 
205 BOOST_CLASS_VERSION(RDKit::EnumerationStrategyBase, 1)
206 
207 #endif
boost::uint64_t getNumPermutations() const
void initialize(const ChemicalReaction &reaction, const EnumerationTypes::BBS &building_blocks)
EnumerationTypes::RGROUPS getSizesFromReactants(const std::vector< MOL_SPTR_VECT > &bbs)
This is a class for storing and applying general chemical reactions.
Definition: Reaction.h:116
pulls in the core RDKit functionality
std::vector< boost::shared_ptr< ROMol > > MOL_SPTR_VECT
Definition: FragCatParams.h:19
EnumerationStrategyException(const std::string &msg)
EnumerationTypes::RGROUPS m_permutation
std::vector< MOL_SPTR_VECT > BBS
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
const EnumerationTypes::RGROUPS & getPosition() const
The current position in the enumeration.
virtual const char * type() const
std::vector< boost::uint64_t > RGROUPS
bool skip(boost::uint64_t skipCount)
void internalInitialize(const EnumerationTypes::RGROUPS &rgroups)
EnumerationTypes::RGROUPS m_permutationSizes
MOL_SPTR_VECT getReactantsFromRGroups(const std::vector< MOL_SPTR_VECT > &bbs, const EnumerationTypes::RGROUPS &rgroups)
class for flagging enumeration strategy errors
EnumerationTypes::RGROUPS getSizesFromBBs(const std::vector< std::vector< T > > &bbs)
Return the number of elements per input vector.
boost::uint64_t computeNumProducts(const EnumerationTypes::RGROUPS &sizes)