RDKit
Open-source cheminformatics and machine learning.
Reaction.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2007-2014, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior written
18 // permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 
33 #ifndef __RD_REACTION_H_17Aug2006__
34 #define __RD_REACTION_H_17Aug2006__
35 
36 #include <GraphMol/RDKitBase.h>
38 #include <vector>
39 
40 namespace RDKit {
41 class ReactionPickler;
42 
43 //! used to indicate an error in the chemical reaction engine
44 class ChemicalReactionException : public std::exception {
45  public:
46  //! construct with an error message
47  explicit ChemicalReactionException(const char *msg) : _msg(msg){};
48  //! construct with an error message
49  explicit ChemicalReactionException(const std::string msg) : _msg(msg){};
50  //! get the error message
51  const char *message() const { return _msg.c_str(); };
53 
54  private:
55  std::string _msg;
56 };
57 
58 //! This is a class for storing and applying general chemical reactions.
59 /*!
60  basic usage will be something like:
61 
62  \verbatim
63  ChemicalReaction rxn;
64  rxn.addReactantTemplate(r1);
65  rxn.addReactantTemplate(r2);
66  rxn.addProductTemplate(p1);
67  rxn.initReactantMatchers();
68 
69  MOL_SPTR_VECT prods;
70  for(MOL_SPTR_VECT::const_iterator r1It=reactantSet1.begin();
71  r1It!=reactantSet1.end();++r1It;){
72  for(MOL_SPTR_VECT::const_iterator r2It=reactantSet2.begin();
73  r2It!=reactantSet2.end();++r2It;){
74  MOL_SPTR_VECT rVect(2);
75  rVect[0] = *r1It;
76  rVect[1] = *r2It;
77 
78  std::vector<MOL_SPTR_VECT> lprods;
79  lprods = rxn.runReactants(rVect);
80  for(std::vector<MOL_SPTR_VECT>::const_iterator lpIt=lprods.begin();
81  lpIt!=lprods.end();++lpIt){
82  // we know this is a single-product reaction:
83  prods.push_back((*lpIt)[0]);
84  }
85  }
86  }
87  \endverbatim
88 
89  NOTES:
90  - to allow more control over the reaction, it is possible to flag reactant
91  atoms as being protected by setting the common_properties::_protected
92  property on those
93  atoms. Here's an example:
94  \verbatim
95  std::string smi="[O:1]>>[N:1]";
96  ChemicalReaction *rxn = RxnSmartsToChemicalReaction(smi);
97  rxn->initReactantMatchers();
98 
99  MOL_SPTR_VECT reacts;
100  reacts.clear();
101  smi = "OCO";
102  ROMol *mol = SmilesToMol(smi);
103  reacts.push_back(ROMOL_SPTR(mol));
104  std::vector<MOL_SPTR_VECT> prods;
105  prods = rxn->runReactants(reacts);
106  // here prods has two entries, because there are two Os in the
107  // reactant.
108 
109  reacts[0]->getAtomWithIdx(0)->setProp(common_properties::_protected,1);
110  prods = rxn->runReactants(reacts);
111  // here prods only has one entry, the reaction at atom 0
112  // has been blocked by the _protected property
113  \endverbatim
114 
115 */
117  friend class ReactionPickler;
118 
119  public:
120  ChemicalReaction() : df_needsInit(true), df_implicitProperties(false){};
122  df_needsInit = other.df_needsInit;
123  df_implicitProperties = other.df_implicitProperties;
124  for (MOL_SPTR_VECT::const_iterator iter = other.beginReactantTemplates();
125  iter != other.endReactantTemplates(); ++iter) {
126  ROMol *reactant = new ROMol(**iter);
127  m_reactantTemplates.push_back(ROMOL_SPTR(reactant));
128  }
129  for (MOL_SPTR_VECT::const_iterator iter = other.beginProductTemplates();
130  iter != other.endProductTemplates(); ++iter) {
131  ROMol *product = new ROMol(**iter);
132  m_productTemplates.push_back(ROMOL_SPTR(product));
133  }
134  for (MOL_SPTR_VECT::const_iterator iter = other.beginAgentTemplates();
135  iter != other.endAgentTemplates(); ++iter) {
136  ROMol *agent = new ROMol(**iter);
137  m_agentTemplates.push_back(ROMOL_SPTR(agent));
138  }
139  }
140  //! construct a reaction from a pickle string
141  ChemicalReaction(const std::string &binStr);
142 
143  //! Adds a new reactant template
144  /*!
145  \return the number of reactants
146 
147  */
148  unsigned int addReactantTemplate(ROMOL_SPTR mol) {
149  this->df_needsInit = true;
150  this->m_reactantTemplates.push_back(mol);
151  return this->m_reactantTemplates.size();
152  }
153 
154  //! Adds a new agent template
155  /*!
156  \return the number of agent
157 
158  */
159  unsigned int addAgentTemplate(ROMOL_SPTR mol) {
160  this->m_agentTemplates.push_back(mol);
161  return this->m_agentTemplates.size();
162  }
163 
164  //! Adds a new product template
165  /*!
166  \return the number of products
167 
168  */
169  unsigned int addProductTemplate(ROMOL_SPTR mol) {
170  this->m_productTemplates.push_back(mol);
171  return this->m_productTemplates.size();
172  }
173 
174  //! Removes the reactant templates from a reaction if atom mapping ratio is
175  // below a given threshold
176  /*! By default the removed reactant templates were attached to the agent
177  templates.
178  An alternative will be to provide a pointer to a molecule vector where
179  these reactants should be saved.
180  */
181  void removeUnmappedReactantTemplates(double thresholdUnmappedAtoms = 0.2,
182  bool moveToAgentTemplates = true,
183  MOL_SPTR_VECT *targetVector = NULL);
184 
185  //! Removes the product templates from a reaction if its atom mapping ratio is
186  // below a given threshold
187  /*! By default the removed products templates were attached to the agent
188  templates.
189  An alternative will be to provide a pointer to a molecule vector where
190  these products should be saved.
191  */
192  void removeUnmappedProductTemplates(double thresholdUnmappedAtoms = 0.2,
193  bool moveToAgentTemplates = true,
194  MOL_SPTR_VECT *targetVector = NULL);
195 
196  /*! Removes the agent templates from a reaction if a pointer to a
197  molecule vector is provided the agents are stored therein.*/
198  void removeAgentTemplates(MOL_SPTR_VECT *targetVector = NULL);
199 
200  //! Runs the reaction on a set of reactants
201  /*!
202 
203  \param reactants: the reactants to be used. The length of this must be equal
204  to
205  this->getNumReactantTemplates()
206 
207  \return a vector of vectors of products. Each subvector will be
208  this->getNumProductTemplates() long.
209 
210  We return a vector of vectors of products because each individual template
211  may
212  map multiple times onto its reactant. This leads to multiple possible result
213  sets.
214  */
215  std::vector<MOL_SPTR_VECT> runReactants(const MOL_SPTR_VECT reactants) const;
216 
217  //! Runs a single reactant against a single reactant template
218  /*!
219  \param reactant The single reactant to use
220 
221  \param reactantTemplateIdx the reactant template to target in the reaction
222  */
223  std::vector<MOL_SPTR_VECT> runReactant(
224  ROMOL_SPTR reactant, unsigned int reactantTemplateIdx) const;
225 
226  const MOL_SPTR_VECT &getReactants() const {
227  return this->m_reactantTemplates;
228  }
229  const MOL_SPTR_VECT &getAgents() const { return this->m_agentTemplates; }
230  const MOL_SPTR_VECT &getProducts() const { return this->m_productTemplates; }
231 
232  MOL_SPTR_VECT::const_iterator beginReactantTemplates() const {
233  return this->m_reactantTemplates.begin();
234  }
235  MOL_SPTR_VECT::const_iterator endReactantTemplates() const {
236  return this->m_reactantTemplates.end();
237  }
238 
239  MOL_SPTR_VECT::const_iterator beginProductTemplates() const {
240  return this->m_productTemplates.begin();
241  }
242  MOL_SPTR_VECT::const_iterator endProductTemplates() const {
243  return this->m_productTemplates.end();
244  }
245 
246  MOL_SPTR_VECT::const_iterator beginAgentTemplates() const {
247  return this->m_agentTemplates.begin();
248  }
249  MOL_SPTR_VECT::const_iterator endAgentTemplates() const {
250  return this->m_agentTemplates.end();
251  }
252 
253  MOL_SPTR_VECT::iterator beginReactantTemplates() {
254  return this->m_reactantTemplates.begin();
255  }
256  MOL_SPTR_VECT::iterator endReactantTemplates() {
257  return this->m_reactantTemplates.end();
258  }
259 
260  MOL_SPTR_VECT::iterator beginProductTemplates() {
261  return this->m_productTemplates.begin();
262  }
263  MOL_SPTR_VECT::iterator endProductTemplates() {
264  return this->m_productTemplates.end();
265  }
266 
267  MOL_SPTR_VECT::iterator beginAgentTemplates() {
268  return this->m_agentTemplates.begin();
269  }
270  MOL_SPTR_VECT::iterator endAgentTemplates() {
271  return this->m_agentTemplates.end();
272  }
273  unsigned int getNumReactantTemplates() const {
274  return this->m_reactantTemplates.size();
275  };
276  unsigned int getNumProductTemplates() const {
277  return this->m_productTemplates.size();
278  };
279  unsigned int getNumAgentTemplates() const {
280  return this->m_agentTemplates.size();
281  };
282 
283  //! initializes our internal reactant-matching datastructures.
284  /*!
285  This must be called after adding reactants and before calling
286  runReactants.
287  */
288  void initReactantMatchers();
289 
290  bool isInitialized() const { return !df_needsInit; };
291 
292  //! validates the reactants and products to make sure the reaction seems
293  //"reasonable"
294  /*!
295  \return true if the reaction validates without errors (warnings do not
296  stop
297  validation)
298 
299  \param numWarnings: used to return the number of validation warnings
300  \param numErrors: used to return the number of validation errors
301 
302  \param silent: If this bool is true, no messages will be logged during the
303  validation.
304  By default, validation problems are reported to the warning
305  and error
306  logs depending on their severity.
307 
308  */
309  bool validate(unsigned int &numWarnings, unsigned int &numErrors,
310  bool silent = false) const;
311 
312  //! returns whether or not the reaction uses implicit
313  //! properties on the product atoms
314  /*!
315 
316  This toggles whether or not unspecified atomic properties in the
317  products are considered to be implicit and should be copied from
318  the actual reactants. This is necessary due to a semantic difference
319  between the "reaction SMARTS" approach and the MDL RXN
320  approach:
321  In "reaction SMARTS", this reaction:
322  [C:1]-[Br:2].[O-:3]>>[C:1]-[O:3].[Br-:2]
323  applied to [CH4+]Br should yield [CH4+]O
324  Something similar drawn in an rxn file, and applied to
325  [CH4+]Br should yield [CH3]O.
326  In rxn there is no charge on the product C because nothing is
327  specified in the rxn file; in "SMARTS" the charge from the
328  actual reactants is not *removed* because no charge is
329  specified in the reaction.
330 
331  */
332  bool getImplicitPropertiesFlag() const { return df_implicitProperties; };
333  //! sets the implicit properties flag. See the documentation for
334  //! getImplicitProertiesFlag() for a discussion of what this means.
335  void setImplicitPropertiesFlag(bool val) { df_implicitProperties = val; };
336 
337  private:
338  bool df_needsInit;
339  bool df_implicitProperties;
340  MOL_SPTR_VECT m_reactantTemplates, m_productTemplates, m_agentTemplates;
341  ChemicalReaction &operator=(const ChemicalReaction &); // disable assignment
342 };
343 
344 //! tests whether or not the molecule has a substructure match
345 //! to any of the reaction's reactants
346 //! the \c which argument is used to return which of the reactants
347 //! the molecule matches. If there's no match, it is equal to the number
348 //! of reactants on return
349 bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn, const ROMol &mol,
350  unsigned int &which);
351 //! \overload
353  const ROMol &mol);
354 
355 //! tests whether or not the molecule has a substructure match
356 //! to any of the reaction's products
357 //! the \c which argument is used to return which of the products
358 //! the molecule matches. If there's no match, it is equal to the number
359 //! of products on return
360 bool isMoleculeProductOfReaction(const ChemicalReaction &rxn, const ROMol &mol,
361  unsigned int &which);
362 //! \overload
363 bool isMoleculeProductOfReaction(const ChemicalReaction &rxn, const ROMol &mol);
364 
365 //! tests whether or not the molecule has a substructure match
366 //! to any of the reaction's agents
367 //! the \c which argument is used to return which of the agents
368 //! the molecule matches. If there's no match, it is equal to the number
369 //! of agents on return
370 bool isMoleculeAgentOfReaction(const ChemicalReaction &rxn, const ROMol &mol,
371  unsigned int &which);
372 //! \overload
373 bool isMoleculeAgentOfReaction(const ChemicalReaction &rxn, const ROMol &mol);
374 
375 //! returns indices of the atoms in each reactant that are changed
376 //! in the reaction
377 /*!
378  \param rxn the reaction we are interested in
379 
380  \param mappedAtomsOnly if set, atoms that are not mapped will not be included
381  in
382  the list of changed atoms (otherwise they are automatically included)
383 
384  How are changed atoms recognized?
385  1) Atoms whose degree changes
386  2) Atoms whose bonding pattern changes
387  3) unmapped atoms (unless the mappedAtomsOnly flag is set)
388  4) Atoms connected to unmapped atoms
389  5) Atoms whose atomic number changes (unless the
390  corresponding product atom is a dummy)
391  6) Atoms with more than one atomic number query (unless the
392  corresponding product atom is a dummy)
393 
394  Note that the atomic number of a query atom depends on how it's constructed.
395  When coming from SMARTS: if the first query is an atomic label/number that
396  sets the atomic number, otherwise it's zero.
397  For example [O;$(OC)] is atomic number 8 while [$(OC);O] is atomic
398  number 0.
399  When coming from RXN: the atomic number of the atom in the rxn file sets
400  the value.
401  */
403  bool mappedAtomsOnly = false);
404 
405 //! add the recursive queries to the reactants of a reaction
406 /*!
407  This does its work using RDKit::addRecursiveQueries()
408 
409  \param rxn the reaction we are interested in
410  \param queries - the dictionary of named queries to add
411  \param propName - the atom property to use to get query names
412  optional:
413  \param reactantLabels - to store pairs of (atom index, query string)
414  per reactant
415 
416  NOTES:
417  - existing query information, if present, will be supplemented (AND logic)
418  - non-query atoms will be replaced with query atoms using only the query
419  logic
420  - query names can be present as comma separated lists, they will then
421  be combined using OR logic.
422  - throws a KeyErrorException if a particular query name is not present
423  in \c queries
424 
425  */
427  ChemicalReaction &rxn, const std::map<std::string, ROMOL_SPTR> &queries,
428  const std::string &propName,
429  std::vector<std::vector<std::pair<unsigned int, std::string> > > *
430  reactantLabels = NULL);
431 
432 } // end of RDKit namespace
433 
434 namespace RDDepict {
435 //! \brief Generate 2D coordinates (a depiction) for a reaction
436 /*!
437 
438  \param rxn the reaction we are interested in
439 
440  \param spacing the spacing between components of the reaction
441 
442  \param updateProps if set, properties such as conjugation and
443  hybridization will be calculated for the reactant and product
444  templates before generating coordinates. This should result in
445  better depictions, but can lead to errors in some cases.
446 
447  \param canonOrient canonicalize the orientation so that the long
448  axes align with the x-axis etc.
449 
450  \param nFlipsPerSample - the number of rotatable bonds that are
451  flipped at random for each sample
452 
453  \param nSamples - the number of samples
454 
455  \param sampleSeed - seed for the random sampling process
456 
457  \param permuteDeg4Nodes - try permuting the drawing order of bonds around
458  atoms with four neighbors in order to improve the depiction
459 
460  for the other parameters see the documentation for compute2DCoords()
461 
462 */
464  double spacing = 2.0, bool updateProps = true,
465  bool canonOrient = false,
466  unsigned int nFlipsPerSample = 0,
467  unsigned int nSamples = 0, int sampleSeed = 0,
468  bool permuteDeg4Nodes = false);
469 
470 } // end of RDDepict namespace
471 
472 #endif
MOL_SPTR_VECT::iterator beginProductTemplates()
Definition: Reaction.h:260
MOL_SPTR_VECT::const_iterator endAgentTemplates() const
Definition: Reaction.h:249
unsigned int getNumProductTemplates() const
Definition: Reaction.h:276
unsigned int getNumReactantTemplates() const
Definition: Reaction.h:273
bool getImplicitPropertiesFlag() const
Definition: Reaction.h:332
bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which)
const MOL_SPTR_VECT & getProducts() const
Definition: Reaction.h:230
const char * message() const
get the error message
Definition: Reaction.h:51
unsigned int getNumAgentTemplates() const
Definition: Reaction.h:279
VECT_INT_VECT getReactingAtoms(const ChemicalReaction &rxn, bool mappedAtomsOnly=false)
ChemicalReaction(const ChemicalReaction &other)
Definition: Reaction.h:121
void setImplicitPropertiesFlag(bool val)
Definition: Reaction.h:335
bool isInitialized() const
Definition: Reaction.h:290
unsigned int addAgentTemplate(ROMOL_SPTR mol)
Adds a new agent template.
Definition: Reaction.h:159
This is a class for storing and applying general chemical reactions.
Definition: Reaction.h:116
pulls in the core RDKit functionality
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:103
std::vector< boost::shared_ptr< ROMol > > MOL_SPTR_VECT
Definition: FragCatParams.h:19
MOL_SPTR_VECT::iterator endProductTemplates()
Definition: Reaction.h:263
std::vector< INT_VECT > VECT_INT_VECT
Definition: types.h:202
MOL_SPTR_VECT::iterator beginReactantTemplates()
Definition: Reaction.h:253
void addRecursiveQueriesToReaction(ChemicalReaction &rxn, const std::map< std::string, ROMOL_SPTR > &queries, const std::string &propName, std::vector< std::vector< std::pair< unsigned int, std::string > > > *reactantLabels=NULL)
add the recursive queries to the reactants of a reaction
bool isMoleculeAgentOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which)
MOL_SPTR_VECT::const_iterator endReactantTemplates() const
Definition: Reaction.h:235
MOL_SPTR_VECT::const_iterator endProductTemplates() const
Definition: Reaction.h:242
boost::shared_ptr< ROMol > ROMOL_SPTR
MOL_SPTR_VECT::const_iterator beginAgentTemplates() const
Definition: Reaction.h:246
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
used to indicate an error in the chemical reaction engine
Definition: Reaction.h:44
const MOL_SPTR_VECT & getAgents() const
Definition: Reaction.h:229
unsigned int addProductTemplate(ROMOL_SPTR mol)
Adds a new product template.
Definition: Reaction.h:169
bool isMoleculeProductOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which)
unsigned int addReactantTemplate(ROMOL_SPTR mol)
Adds a new reactant template.
Definition: Reaction.h:148
MOL_SPTR_VECT::const_iterator beginReactantTemplates() const
Definition: Reaction.h:232
MOL_SPTR_VECT::const_iterator beginProductTemplates() const
Definition: Reaction.h:239
void compute2DCoordsForReaction(RDKit::ChemicalReaction &rxn, double spacing=2.0, bool updateProps=true, bool canonOrient=false, unsigned int nFlipsPerSample=0, unsigned int nSamples=0, int sampleSeed=0, bool permuteDeg4Nodes=false)
Generate 2D coordinates (a depiction) for a reaction.
ChemicalReactionException(const std::string msg)
construct with an error message
Definition: Reaction.h:49
const MOL_SPTR_VECT & getReactants() const
Definition: Reaction.h:226
MOL_SPTR_VECT::iterator endReactantTemplates()
Definition: Reaction.h:256
handles pickling (serializing) reactions
MOL_SPTR_VECT::iterator beginAgentTemplates()
Definition: Reaction.h:267
MOL_SPTR_VECT::iterator endAgentTemplates()
Definition: Reaction.h:270
ChemicalReactionException(const char *msg)
construct with an error message
Definition: Reaction.h:47