RDKit
Open-source cheminformatics and machine learning.
Reaction.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2007-2014, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior written
18 // permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 
33 #ifndef __RD_REACTION_H_17Aug2006__
34 #define __RD_REACTION_H_17Aug2006__
35 
36 #include <GraphMol/RDKitBase.h>
38 #include <vector>
39 
40 namespace RDKit {
41 class ReactionPickler;
42 
43 //! used to indicate an error in the chemical reaction engine
44 class ChemicalReactionException : public std::exception {
45  public:
46  //! construct with an error message
47  explicit ChemicalReactionException(const char *msg) : _msg(msg){};
48  //! construct with an error message
49  explicit ChemicalReactionException(const std::string msg) : _msg(msg){};
50  //! get the error message
51  const char *message() const { return _msg.c_str(); };
53 
54  private:
55  std::string _msg;
56 };
57 
58 //! This is a class for storing and applying general chemical reactions.
59 /*!
60  basic usage will be something like:
61 
62  \verbatim
63  ChemicalReaction rxn;
64  rxn.addReactantTemplate(r1);
65  rxn.addReactantTemplate(r2);
66  rxn.addProductTemplate(p1);
67  rxn.initReactantMatchers();
68 
69  MOL_SPTR_VECT prods;
70  for(MOL_SPTR_VECT::const_iterator r1It=reactantSet1.begin();
71  r1It!=reactantSet1.end();++r1It;){
72  for(MOL_SPTR_VECT::const_iterator r2It=reactantSet2.begin();
73  r2It!=reactantSet2.end();++r2It;){
74  MOL_SPTR_VECT rVect(2);
75  rVect[0] = *r1It;
76  rVect[1] = *r2It;
77 
78  std::vector<MOL_SPTR_VECT> lprods;
79  lprods = rxn.runReactants(rVect);
80  for(std::vector<MOL_SPTR_VECT>::const_iterator lpIt=lprods.begin();
81  lpIt!=lprods.end();++lpIt){
82  // we know this is a single-product reaction:
83  prods.push_back((*lpIt)[0]);
84  }
85  }
86  }
87  \endverbatim
88 
89  NOTES:
90  - to allow more control over the reaction, it is possible to flag reactant
91  atoms as being protected by setting the common_properties::_protected
92  property on those
93  atoms. Here's an example:
94  \verbatim
95  std::string smi="[O:1]>>[N:1]";
96  ChemicalReaction *rxn = RxnSmartsToChemicalReaction(smi);
97  rxn->initReactantMatchers();
98 
99  MOL_SPTR_VECT reacts;
100  reacts.clear();
101  smi = "OCO";
102  ROMol *mol = SmilesToMol(smi);
103  reacts.push_back(ROMOL_SPTR(mol));
104  std::vector<MOL_SPTR_VECT> prods;
105  prods = rxn->runReactants(reacts);
106  // here prods has two entries, because there are two Os in the
107  // reactant.
108 
109  reacts[0]->getAtomWithIdx(0)->setProp(common_properties::_protected,1);
110  prods = rxn->runReactants(reacts);
111  // here prods only has one entry, the reaction at atom 0
112  // has been blocked by the _protected property
113  \endverbatim
114 
115 */
117  friend class ReactionPickler;
118 
119  public:
120  ChemicalReaction() : df_needsInit(true), df_implicitProperties(false){};
122  df_needsInit = other.df_needsInit;
123  df_implicitProperties = other.df_implicitProperties;
124  m_reactantTemplates = other.m_reactantTemplates;
125  m_productTemplates = other.m_productTemplates;
126  m_agentTemplates = other.m_agentTemplates;
127  }
128  //! construct a reaction from a pickle string
129  ChemicalReaction(const std::string &binStr);
130 
131  //! Adds a new reactant template
132  /*!
133  \return the number of reactants
134 
135  */
136  unsigned int addReactantTemplate(ROMOL_SPTR mol) {
137  this->df_needsInit = true;
138  this->m_reactantTemplates.push_back(mol);
139  return this->m_reactantTemplates.size();
140  }
141 
142  //! Adds a new agent template
143  /*!
144  \return the number of agent
145 
146  */
147  unsigned int addAgentTemplate(ROMOL_SPTR mol) {
148  this->m_agentTemplates.push_back(mol);
149  return this->m_agentTemplates.size();
150  }
151 
152  //! Adds a new product template
153  /*!
154  \return the number of products
155 
156  */
157  unsigned int addProductTemplate(ROMOL_SPTR mol) {
158  this->m_productTemplates.push_back(mol);
159  return this->m_productTemplates.size();
160  }
161 
162  //! Removes the reactant templates from a reaction if atom mapping ratio is
163  // below a given threshold
164  /*! By default the removed reactant templates were attached to the agent
165  templates.
166  An alternative will be to provide a pointer to a molecule vector where
167  these reactants should be saved.
168  */
169  void removeUnmappedReactantTemplates(double thresholdUnmappedAtoms = 0.2,
170  bool moveToAgentTemplates = true,
171  MOL_SPTR_VECT *targetVector = NULL);
172 
173  //! Removes the product templates from a reaction if its atom mapping ratio is
174  // below a given threshold
175  /*! By default the removed products templates were attached to the agent
176  templates.
177  An alternative will be to provide a pointer to a molecule vector where
178  these products should be saved.
179  */
180  void removeUnmappedProductTemplates(double thresholdUnmappedAtoms = 0.2,
181  bool moveToAgentTemplates = true,
182  MOL_SPTR_VECT *targetVector = NULL);
183 
184  /*! Removes the agent templates from a reaction if a pointer to a
185  molecule vector is provided the agents are stored therein.*/
186  void removeAgentTemplates(MOL_SPTR_VECT *targetVector = NULL);
187 
188  //! Runs the reaction on a set of reactants
189  /*!
190 
191  \param reactants: the reactants to be used. The length of this must be equal
192  to
193  this->getNumReactantTemplates()
194 
195  \return a vector of vectors of products. Each subvector will be
196  this->getNumProductTemplates() long.
197 
198  We return a vector of vectors of products because each individual template
199  may
200  map multiple times onto its reactant. This leads to multiple possible result
201  sets.
202  */
203  std::vector<MOL_SPTR_VECT> runReactants(const MOL_SPTR_VECT reactants) const;
204 
205  //! Runs a single reactant against a single reactant template
206  /*!
207  \param reactant The single reactant to use
208 
209  \param reactantTemplateIdx the reactant template to target in the reaction
210  */
211  std::vector<MOL_SPTR_VECT> runReactant(
212  ROMOL_SPTR reactant, unsigned int reactantTemplateIdx) const;
213 
214  const MOL_SPTR_VECT &getReactants() const {
215  return this->m_reactantTemplates;
216  }
217  const MOL_SPTR_VECT &getAgents() const { return this->m_agentTemplates; }
218  const MOL_SPTR_VECT &getProducts() const { return this->m_productTemplates; }
219 
220  MOL_SPTR_VECT::const_iterator beginReactantTemplates() const {
221  return this->m_reactantTemplates.begin();
222  }
223  MOL_SPTR_VECT::const_iterator endReactantTemplates() const {
224  return this->m_reactantTemplates.end();
225  }
226 
227  MOL_SPTR_VECT::const_iterator beginProductTemplates() const {
228  return this->m_productTemplates.begin();
229  }
230  MOL_SPTR_VECT::const_iterator endProductTemplates() const {
231  return this->m_productTemplates.end();
232  }
233 
234  MOL_SPTR_VECT::const_iterator beginAgentTemplates() const {
235  return this->m_agentTemplates.begin();
236  }
237  MOL_SPTR_VECT::const_iterator endAgentTemplates() const {
238  return this->m_agentTemplates.end();
239  }
240 
241  MOL_SPTR_VECT::iterator beginReactantTemplates() {
242  return this->m_reactantTemplates.begin();
243  }
244  MOL_SPTR_VECT::iterator endReactantTemplates() {
245  return this->m_reactantTemplates.end();
246  }
247 
248  MOL_SPTR_VECT::iterator beginProductTemplates() {
249  return this->m_productTemplates.begin();
250  }
251  MOL_SPTR_VECT::iterator endProductTemplates() {
252  return this->m_productTemplates.end();
253  }
254 
255  MOL_SPTR_VECT::iterator beginAgentTemplates() {
256  return this->m_agentTemplates.begin();
257  }
258  MOL_SPTR_VECT::iterator endAgentTemplates() {
259  return this->m_agentTemplates.end();
260  }
261  unsigned int getNumReactantTemplates() const {
262  return this->m_reactantTemplates.size();
263  };
264  unsigned int getNumProductTemplates() const {
265  return this->m_productTemplates.size();
266  };
267  unsigned int getNumAgentTemplates() const {
268  return this->m_agentTemplates.size();
269  };
270 
271  //! initializes our internal reactant-matching datastructures.
272  /*!
273  This must be called after adding reactants and before calling
274  runReactants.
275  */
276  void initReactantMatchers();
277 
278  bool isInitialized() const { return !df_needsInit; };
279 
280  //! validates the reactants and products to make sure the reaction seems
281  //"reasonable"
282  /*!
283  \return true if the reaction validates without errors (warnings do not
284  stop
285  validation)
286 
287  \param numWarnings: used to return the number of validation warnings
288  \param numErrors: used to return the number of validation errors
289 
290  \param silent: If this bool is true, no messages will be logged during the
291  validation.
292  By default, validation problems are reported to the warning
293  and error
294  logs depending on their severity.
295 
296  */
297  bool validate(unsigned int &numWarnings, unsigned int &numErrors,
298  bool silent = false) const;
299 
300  //! returns whether or not the reaction uses implicit
301  //! properties on the product atoms
302  /*!
303 
304  This toggles whether or not unspecified atomic properties in the
305  products are considered to be implicit and should be copied from
306  the actual reactants. This is necessary due to a semantic difference
307  between the "reaction SMARTS" approach and the MDL RXN
308  approach:
309  In "reaction SMARTS", this reaction:
310  [C:1]-[Br:2].[O-:3]>>[C:1]-[O:3].[Br-:2]
311  applied to [CH4+]Br should yield [CH4+]O
312  Something similar drawn in an rxn file, and applied to
313  [CH4+]Br should yield [CH3]O.
314  In rxn there is no charge on the product C because nothing is
315  specified in the rxn file; in "SMARTS" the charge from the
316  actual reactants is not *removed* because no charge is
317  specified in the reaction.
318 
319  */
320  bool getImplicitPropertiesFlag() const { return df_implicitProperties; };
321  //! sets the implicit properties flag. See the documentation for
322  //! getImplicitProertiesFlag() for a discussion of what this means.
323  void setImplicitPropertiesFlag(bool val) { df_implicitProperties = val; };
324 
325  private:
326  bool df_needsInit;
327  bool df_implicitProperties;
328  MOL_SPTR_VECT m_reactantTemplates, m_productTemplates, m_agentTemplates;
329  ChemicalReaction &operator=(const ChemicalReaction &); // disable assignment
330 };
331 
332 //! tests whether or not the molecule has a substructure match
333 //! to any of the reaction's reactants
334 //! the \c which argument is used to return which of the reactants
335 //! the molecule matches. If there's no match, it is equal to the number
336 //! of reactants on return
337 bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn, const ROMol &mol,
338  unsigned int &which);
339 //! \overload
340 bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn,
341  const ROMol &mol);
342 
343 //! tests whether or not the molecule has a substructure match
344 //! to any of the reaction's products
345 //! the \c which argument is used to return which of the products
346 //! the molecule matches. If there's no match, it is equal to the number
347 //! of products on return
348 bool isMoleculeProductOfReaction(const ChemicalReaction &rxn, const ROMol &mol,
349  unsigned int &which);
350 //! \overload
351 bool isMoleculeProductOfReaction(const ChemicalReaction &rxn, const ROMol &mol);
352 
353 //! tests whether or not the molecule has a substructure match
354 //! to any of the reaction's agents
355 //! the \c which argument is used to return which of the agents
356 //! the molecule matches. If there's no match, it is equal to the number
357 //! of agents on return
358 bool isMoleculeAgentOfReaction(const ChemicalReaction &rxn, const ROMol &mol,
359  unsigned int &which);
360 //! \overload
361 bool isMoleculeAgentOfReaction(const ChemicalReaction &rxn, const ROMol &mol);
362 
363 //! returns indices of the atoms in each reactant that are changed
364 //! in the reaction
365 /*!
366  \param rxn the reaction we are interested in
367 
368  \param mappedAtomsOnly if set, atoms that are not mapped will not be included
369  in
370  the list of changed atoms (otherwise they are automatically included)
371 
372  How are changed atoms recognized?
373  1) Atoms whose degree changes
374  2) Atoms whose bonding pattern changes
375  3) unmapped atoms (unless the mappedAtomsOnly flag is set)
376  4) Atoms connected to unmapped atoms
377  5) Atoms whose atomic number changes (unless the
378  corresponding product atom is a dummy)
379  6) Atoms with more than one atomic number query (unless the
380  corresponding product atom is a dummy)
381 
382  Note that the atomic number of a query atom depends on how it's constructed.
383  When coming from SMARTS: if the first query is an atomic label/number that
384  sets the atomic number, otherwise it's zero.
385  For example [O;$(OC)] is atomic number 8 while [$(OC);O] is atomic
386  number 0.
387  When coming from RXN: the atomic number of the atom in the rxn file sets
388  the value.
389  */
390 VECT_INT_VECT getReactingAtoms(const ChemicalReaction &rxn,
391  bool mappedAtomsOnly = false);
392 
393 //! add the recursive queries to the reactants of a reaction
394 /*!
395  This does its work using RDKit::addRecursiveQueries()
396 
397  \param rxn the reaction we are interested in
398  \param queries - the dictionary of named queries to add
399  \param propName - the atom property to use to get query names
400  optional:
401  \param reactantLabels - to store pairs of (atom index, query string)
402  per reactant
403 
404  NOTES:
405  - existing query information, if present, will be supplemented (AND logic)
406  - non-query atoms will be replaced with query atoms using only the query
407  logic
408  - query names can be present as comma separated lists, they will then
409  be combined using OR logic.
410  - throws a KeyErrorException if a particular query name is not present
411  in \c queries
412 
413  */
415  ChemicalReaction &rxn, const std::map<std::string, ROMOL_SPTR> &queries,
416  const std::string &propName,
417  std::vector<std::vector<std::pair<unsigned int, std::string> > > *
418  reactantLabels = NULL);
419 
420 } // end of RDKit namespace
421 
422 namespace RDDepict {
423 //! \brief Generate 2D coordinates (a depiction) for a reaction
424 /*!
425 
426  \param rxn the reaction we are interested in
427 
428  \param spacing the spacing between components of the reaction
429 
430  \param updateProps if set, properties such as conjugation and
431  hybridization will be calculated for the reactant and product
432  templates before generating coordinates. This should result in
433  better depictions, but can lead to errors in some cases.
434 
435  \param canonOrient canonicalize the orientation so that the long
436  axes align with the x-axis etc.
437 
438  \param nFlipsPerSample - the number of rotatable bonds that are
439  flipped at random for each sample
440 
441  \param nSamples - the number of samples
442 
443  \param sampleSeed - seed for the random sampling process
444 
445  \param permuteDeg4Nodes - try permuting the drawing order of bonds around
446  atoms with four neighbors in order to improve the depiction
447 
448  for the other parameters see the documentation for compute2DCoords()
449 
450 */
452  double spacing = 2.0, bool updateProps = true,
453  bool canonOrient = false,
454  unsigned int nFlipsPerSample = 0,
455  unsigned int nSamples = 0, int sampleSeed = 0,
456  bool permuteDeg4Nodes = false);
457 
458 } // end of RDDepict namespace
459 
460 #endif
MOL_SPTR_VECT::iterator beginProductTemplates()
Definition: Reaction.h:248
MOL_SPTR_VECT::const_iterator endAgentTemplates() const
Definition: Reaction.h:237
void initReactantMatchers()
initializes our internal reactant-matching datastructures.
unsigned int getNumProductTemplates() const
Definition: Reaction.h:264
void removeUnmappedReactantTemplates(double thresholdUnmappedAtoms=0.2, bool moveToAgentTemplates=true, MOL_SPTR_VECT *targetVector=NULL)
Removes the reactant templates from a reaction if atom mapping ratio is.
void removeUnmappedProductTemplates(double thresholdUnmappedAtoms=0.2, bool moveToAgentTemplates=true, MOL_SPTR_VECT *targetVector=NULL)
Removes the product templates from a reaction if its atom mapping ratio is.
unsigned int getNumReactantTemplates() const
Definition: Reaction.h:261
bool getImplicitPropertiesFlag() const
Definition: Reaction.h:320
bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which)
const MOL_SPTR_VECT & getProducts() const
Definition: Reaction.h:218
const char * message() const
get the error message
Definition: Reaction.h:51
unsigned int getNumAgentTemplates() const
Definition: Reaction.h:267
VECT_INT_VECT getReactingAtoms(const ChemicalReaction &rxn, bool mappedAtomsOnly=false)
ChemicalReaction(const ChemicalReaction &other)
Definition: Reaction.h:121
void setImplicitPropertiesFlag(bool val)
Definition: Reaction.h:323
bool isInitialized() const
Definition: Reaction.h:278
unsigned int addAgentTemplate(ROMOL_SPTR mol)
Adds a new agent template.
Definition: Reaction.h:147
This is a class for storing and applying general chemical reactions.
Definition: Reaction.h:116
pulls in the core RDKit functionality
std::vector< boost::shared_ptr< ROMol > > MOL_SPTR_VECT
Definition: FragCatParams.h:19
MOL_SPTR_VECT::iterator endProductTemplates()
Definition: Reaction.h:251
std::vector< INT_VECT > VECT_INT_VECT
Definition: types.h:160
MOL_SPTR_VECT::iterator beginReactantTemplates()
Definition: Reaction.h:241
void addRecursiveQueriesToReaction(ChemicalReaction &rxn, const std::map< std::string, ROMOL_SPTR > &queries, const std::string &propName, std::vector< std::vector< std::pair< unsigned int, std::string > > > *reactantLabels=NULL)
add the recursive queries to the reactants of a reaction
bool isMoleculeAgentOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which)
MOL_SPTR_VECT::const_iterator endReactantTemplates() const
Definition: Reaction.h:223
MOL_SPTR_VECT::const_iterator endProductTemplates() const
Definition: Reaction.h:230
boost::shared_ptr< ROMol > ROMOL_SPTR
MOL_SPTR_VECT::const_iterator beginAgentTemplates() const
Definition: Reaction.h:234
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
used to indicate an error in the chemical reaction engine
Definition: Reaction.h:44
const MOL_SPTR_VECT & getAgents() const
Definition: Reaction.h:217
std::vector< MOL_SPTR_VECT > runReactant(ROMOL_SPTR reactant, unsigned int reactantTemplateIdx) const
Runs a single reactant against a single reactant template.
unsigned int addProductTemplate(ROMOL_SPTR mol)
Adds a new product template.
Definition: Reaction.h:157
bool isMoleculeProductOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which)
unsigned int addReactantTemplate(ROMOL_SPTR mol)
Adds a new reactant template.
Definition: Reaction.h:136
std::vector< MOL_SPTR_VECT > runReactants(const MOL_SPTR_VECT reactants) const
Runs the reaction on a set of reactants.
bool validate(unsigned int &numWarnings, unsigned int &numErrors, bool silent=false) const
validates the reactants and products to make sure the reaction seems
MOL_SPTR_VECT::const_iterator beginReactantTemplates() const
Definition: Reaction.h:220
MOL_SPTR_VECT::const_iterator beginProductTemplates() const
Definition: Reaction.h:227
void compute2DCoordsForReaction(RDKit::ChemicalReaction &rxn, double spacing=2.0, bool updateProps=true, bool canonOrient=false, unsigned int nFlipsPerSample=0, unsigned int nSamples=0, int sampleSeed=0, bool permuteDeg4Nodes=false)
Generate 2D coordinates (a depiction) for a reaction.
ChemicalReactionException(const std::string msg)
construct with an error message
Definition: Reaction.h:49
const MOL_SPTR_VECT & getReactants() const
Definition: Reaction.h:214
MOL_SPTR_VECT::iterator endReactantTemplates()
Definition: Reaction.h:244
handles pickling (serializing) reactions
MOL_SPTR_VECT::iterator beginAgentTemplates()
Definition: Reaction.h:255
void removeAgentTemplates(MOL_SPTR_VECT *targetVector=NULL)
MOL_SPTR_VECT::iterator endAgentTemplates()
Definition: Reaction.h:258
ChemicalReactionException(const char *msg)
construct with an error message
Definition: Reaction.h:47