RDKit
Open-source cheminformatics and machine learning.
MolOps.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2012 Greg Landrum and Rational Discovery LLC
3 // Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc.
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #ifndef _RD_MOL_OPS_H_
12 #define _RD_MOL_OPS_H_
13 
14 #include <vector>
15 #include <map>
16 #include <list>
18 #include <boost/smart_ptr.hpp>
19 #include <boost/dynamic_bitset.hpp>
21 #include <RDGeneral/types.h>
22 
23 extern const int ci_LOCAL_INF;
24 namespace RDKit {
25 class ROMol;
26 class RWMol;
27 class Atom;
28 class Bond;
29 typedef std::vector<double> INVAR_VECT;
30 typedef INVAR_VECT::iterator INVAR_VECT_I;
31 typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
32 
33 //! \brief Groups a variety of molecular query and transformation operations.
34 namespace MolOps {
35 
36 //! return the number of electrons available on an atom to donate for
37 // aromaticity
38 /*!
39  The result is determined using the default valency, number of lone pairs,
40  number of bonds and the formal charge. Note that the atom may not donate
41  all of these electrons to a ring for aromaticity (also used in Conjugation
42  and hybridization code).
43 
44  \param at the atom of interest
45 
46  \return the number of electrons
47 */
48 int countAtomElec(const Atom *at);
49 
50 //! sums up all atomic formal charges and returns the result
51 int getFormalCharge(const ROMol &mol);
52 
53 //! returns whether or not the given Atom is involved in a conjugated bond
54 bool atomHasConjugatedBond(const Atom *at);
55 
56 //! find fragments (disconnected components of the molecular graph)
57 /*!
58 
59  \param mol the molecule of interest
60  \param mapping used to return the mapping of Atoms->fragments.
61  On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
62  and will contain the fragment assignment for each Atom
63 
64  \return the number of fragments found.
65 
66 */
67 unsigned int getMolFrags(const ROMol &mol, std::vector<int> &mapping);
68 //! find fragments (disconnected components of the molecular graph)
69 /*!
70 
71  \param mol the molecule of interest
72  \param frags used to return the Atoms in each fragment
73  On return \c mapping will be \c numFrags long, and each entry
74  will contain the indices of the Atoms in that fragment.
75 
76  \return the number of fragments found.
77 
78 */
79 unsigned int getMolFrags(const ROMol &mol,
80  std::vector<std::vector<int> > &frags);
81 
82 //! splits a molecule into its component fragments
83 // (disconnected components of the molecular graph)
84 /*!
85 
86  \param mol the molecule of interest
87  \param sanitizeFrags toggles sanitization of the fragments after
88  they are built
89  \param frags used to return the mapping of Atoms->fragments.
90  if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
91  on return and will contain the fragment assignment for each Atom
92  \param fragsMolAtomMapping used to return the Atoms in each fragment
93  On return \c mapping will be \c numFrags long, and each entry
94  will contain the indices of the Atoms in that fragment.
95  \param copyConformers toggles copying conformers of the fragments after
96  they are built
97  \return a vector of the fragments as smart pointers to ROMols
98 
99 */
100 std::vector<boost::shared_ptr<ROMol> > getMolFrags(
101  const ROMol &mol, bool sanitizeFrags = true, std::vector<int> *frags = 0,
102  std::vector<std::vector<int> > *fragsMolAtomMapping = 0,
103  bool copyConformers = true);
104 
105 //! splits a molecule into pieces based on labels assigned using a query
106 /*!
107 
108  \param mol the molecule of interest
109  \param query the query used to "label" the molecule for fragmentation
110  \param sanitizeFrags toggles sanitization of the fragments after
111  they are built
112  \param whiteList if provided, only labels in the list will be kept
113  \param negateList if true, the white list logic will be inverted: only labels
114  not in the list will be kept
115 
116  \return a map of the fragments and their labels
117 
118 */
119 template <typename T>
120 std::map<T, boost::shared_ptr<ROMol> > getMolFragsWithQuery(
121  const ROMol &mol, T (*query)(const ROMol &, const Atom *),
122  bool sanitizeFrags = true, const std::vector<T> *whiteList = 0,
123  bool negateList = false);
124 
125 #if 0
126  //! finds a molecule's minimium spanning tree (MST)
127  /*!
128  \param mol the molecule of interest
129  \param mst used to return the MST as a vector of bond indices
130  */
131  void findSpanningTree(const ROMol &mol,std::vector<int> &mst);
132 #endif
133 
134 //! calculates Balaban's J index for the molecule
135 /*!
136  \param mol the molecule of interest
137  \param useBO toggles inclusion of the bond order in the calculation
138  (when false, we're not really calculating the J value)
139  \param force forces the calculation (instead of using cached results)
140  \param bondPath when included, only paths using bonds whose indices occur
141  in this vector will be included in the calculation
142  \param cacheIt If this is true, the calculated value will be cached
143  as a property on the molecule
144  \return the J index
145 
146 */
147 double computeBalabanJ(const ROMol &mol, bool useBO = true, bool force = false,
148  const std::vector<int> *bondPath = 0,
149  bool cacheIt = true);
150 //! \overload
151 double computeBalabanJ(double *distMat, int nb, int nAts);
152 
153 //! \name Dealing with hydrogens
154 //{@
155 
156 //! returns a copy of a molecule with hydrogens added in as explicit Atoms
157 /*!
158  \param mol the molecule to add Hs to
159  \param explicitOnly (optional) if this \c true, only explicit Hs will be
160  added
161  \param addCoords (optional) If this is true, estimates for the atomic
162  coordinates
163  of the added Hs will be used.
164  \param onlyOnAtoms (optional) if provided, this should be a vector of
165  IDs of the atoms that will be considered for H addition.
166 
167  \return the new molecule
168 
169  <b>Notes:</b>
170  - it makes no sense to use the \c addCoords option if the molecule's
171  heavy
172  atoms don't already have coordinates.
173  - the caller is responsible for <tt>delete</tt>ing the pointer this
174  returns.
175  */
176 ROMol *addHs(const ROMol &mol, bool explicitOnly = false,
177  bool addCoords = false, const UINT_VECT *onlyOnAtoms = NULL);
178 //! \overload
179 // modifies the molecule in place
180 void addHs(RWMol &mol, bool explicitOnly = false, bool addCoords = false,
181  const UINT_VECT *onlyOnAtoms = NULL);
182 
183 //! returns a copy of a molecule with hydrogens removed
184 /*!
185  \param mol the molecule to remove Hs from
186  \param implicitOnly (optional) if this \c true, only implicit Hs will be
187  removed
188  \param updateExplicitCount (optional) If this is \c true, when explicit Hs
189  are removed
190  from the graph, the heavy atom to which they are bound will have its
191  counter of
192  explicit Hs increased.
193  \param sanitize: (optional) If this is \c true, the final molecule will be
194  sanitized
195 
196  \return the new molecule
197 
198  <b>Notes:</b>
199  - Hydrogens which aren't connected to a heavy atom will not be
200  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
201  all atoms removed.
202  - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
203  will not be removed.
204  - two coordinate Hs, like the central H in C[H-]C, will not be removed
205  - Hs connected to dummy atoms will not be removed
206 
207  - the caller is responsible for <tt>delete</tt>ing the pointer this
208  returns.
209 */
210 ROMol *removeHs(const ROMol &mol, bool implicitOnly = false,
211  bool updateExplicitCount = false, bool sanitize = true);
212 //! \overload
213 // modifies the molecule in place
214 void removeHs(RWMol &mol, bool implicitOnly = false,
215  bool updateExplicitCount = false, bool sanitize = true);
216 
217 //! returns a copy of a molecule with hydrogens removed and added as queries
218 //! to the heavy atoms to which they are bound.
219 /*!
220  This is really intended to be used with molecules that contain QueryAtoms
221 
222  \param mol the molecule to remove Hs from
223 
224  \return the new molecule
225 
226  <b>Notes:</b>
227  - Atoms that do not already have hydrogen count queries will have one
228  added, other H-related queries will not be touched. Examples:
229  - C[H] -> [C;!H0]
230  - [C;H1][H] -> [C;H1]
231  - [C;H2][H] -> [C;H2]
232  - Hydrogens which aren't connected to a heavy atom will not be
233  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
234  all atoms removed.
235  - the caller is responsible for <tt>delete</tt>ing the pointer this returns.
236  - By default all hydrogens are removed, however if
237  mergeUnmappedOnly is true, any hydrogen participating
238  in an atom map will be retained
239 
240 */
241 ROMol *mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly = false);
242 //! \overload
243 // modifies the molecule in place
244 void mergeQueryHs(RWMol &mol, bool mergeUnmappedOnly = false);
245 
246 typedef enum {
253  ADJUST_IGNOREALL = 0xFFFFFFF
255 
257  bool adjustDegree; /**< add degree queries */
258  boost::uint32_t adjustDegreeFlags;
259  bool adjustRingCount; /**< add ring-count queries */
260  boost::uint32_t adjustRingCountFlags;
261 
262  bool makeDummiesQueries; /**< convert dummy atoms without isotope labels to
263  any-atom queries */
265  bool makeBondsGeneric; /**< convert bonds to generic queries (any bonds) */
266  boost::uint32_t makeBondsGenericFlags;
267  bool makeAtomsGeneric; /**< convert atoms to generic queries (any atoms) */
268  boost::uint32_t makeAtomsGenericFlags;
269 
271  : adjustDegree(true),
272  adjustDegreeFlags(ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS),
273  adjustRingCount(false),
274  adjustRingCountFlags(ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS),
275  makeDummiesQueries(true),
276  aromatizeIfPossible(true),
277  makeBondsGeneric(false),
278  makeBondsGenericFlags(ADJUST_IGNORENONE),
279  makeAtomsGeneric(false),
280  makeAtomsGenericFlags(ADJUST_IGNORENONE) {}
281 };
282 //! returns a copy of a molecule with query properties adjusted
283 /*!
284  \param mol the molecule to adjust
285  \param params controls the adjustments made
286 
287  \return the new molecule
288 */
289 ROMol *adjustQueryProperties(const ROMol &mol,
290  const AdjustQueryParameters *params = NULL);
291 //! \overload
292 // modifies the molecule in place
293 void adjustQueryProperties(RWMol &mol,
294  const AdjustQueryParameters *params = NULL);
295 
296 //! returns a copy of a molecule with the atoms renumbered
297 /*!
298 
299  \param mol the molecule to work with
300  \param newOrder the new ordering of the atoms (should be numAtoms long)
301  for example: if newOrder is [3,2,0,1], then atom 3 in the original
302  molecule will be atom 0 in the new one
303 
304  \return the new molecule
305 
306  <b>Notes:</b>
307  - the caller is responsible for <tt>delete</tt>ing the pointer this returns.
308 
309 */
310 ROMol *renumberAtoms(const ROMol &mol,
311  const std::vector<unsigned int> &newOrder);
312 
313 //@}
314 
315 //! \name Sanitization
316 //@{
317 
318 typedef enum {
330  SANITIZE_ALL = 0xFFFFFFF
331 } SanitizeFlags;
332 
333 //! \brief carries out a collection of tasks for cleaning up a molecule and
334 // ensuring
335 //! that it makes "chemical sense"
336 /*!
337  This functions calls the following in sequence
338  -# MolOps::cleanUp()
339  -# mol.updatePropertyCache()
340  -# MolOps::symmetrizeSSSR()
341  -# MolOps::Kekulize()
342  -# MolOps::assignRadicals()
343  -# MolOps::setAromaticity()
344  -# MolOps::setConjugation()
345  -# MolOps::setHybridization()
346  -# MolOps::cleanupChirality()
347  -# MolOps::adjustHs()
348 
349  \param mol : the RWMol to be cleaned
350 
351  \param operationThatFailed : the first (if any) sanitization operation that
352  fails is set here.
353  The values are taken from the \c SanitizeFlags
354  enum.
355  On success, the value is \c
356  SanitizeFlags::SANITIZE_NONE
357 
358  \param sanitizeOps : the bits here are used to set which sanitization
359  operations are carried
360  out. The elements of the \c SanitizeFlags enum define
361  the operations.
362 
363  <b>Notes:</b>
364  - If there is a failure in the sanitization, a \c SanitException
365  will be thrown.
366  - in general the user of this function should cast the molecule following
367  this
368  function to a ROMol, so that new atoms and bonds cannot be added to the
369  molecule and screw up the sanitizing that has been done here
370 */
371 void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed,
372  unsigned int sanitizeOps = SANITIZE_ALL);
373 //! \overload
374 void sanitizeMol(RWMol &mol);
375 
376 //! Possible aromaticity models
377 /*!
378 - \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT
379 - \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit
380 Book)
381 - \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it
382 does not consider the outer envelope of fused rings)
383 - \c AROMATICITY_CUSTOM uses a caller-provided function
384 */
385 typedef enum {
386  AROMATICITY_DEFAULT = 0x0, ///< future proofing
389  AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function
391 
392 //! Sets up the aromaticity for a molecule
393 /*!
394 
395  This is what happens here:
396  -# find all the simple rings by calling the findSSSR function
397  -# loop over all the Atoms in each ring and mark them if they are
398  candidates
399  for aromaticity. A ring atom is a candidate if it can spare electrons
400  to the ring and if it's from the first two rows of the periodic table.
401  -# based on the candidate atoms, mark the rings to be either candidates
402  or non-candidates. A ring is a candidate only if all its atoms are
403  candidates
404  -# apply Hueckel rule to each of the candidate rings to check if the ring
405  can be
406  aromatic
407 
408  \param mol the RWMol of interest
409  \param model the aromaticity model to use
410  \param func a custom function for assigning aromaticity (only used when
411  model=\c AROMATICITY_CUSTOM)
412 
413  \return >0 on success, <= 0 otherwise
414 
415  <b>Assumptions:</b>
416  - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
417  been called)
418 
419 */
421  int (*func)(RWMol &) = NULL);
422 
423 //! Designed to be called by the sanitizer to handle special cases before
424 // anything is done.
425 /*!
426 
427  Currently this:
428  - modifies nitro groups, so that the nitrogen does not have an unreasonable
429  valence of 5, as follows:
430  - the nitrogen gets a positive charge
431  - one of the oxygens gets a negative chage and the double bond to this
432  oxygen is changed to a single bond
433  The net result is that nitro groups can be counted on to be:
434  \c "[N+](=O)[O-]"
435  - modifies halogen-oxygen containing species as follows:
436  \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
437  \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
438  \c [Cl,Br,I](=O)O -> [X+]([O-])O
439  - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
440 
441  \param mol the molecule of interest
442 
443 */
444 void cleanUp(RWMol &mol);
445 
446 //! Called by the sanitizer to assign radical counts to atoms
447 void assignRadicals(RWMol &mol);
448 
449 //! adjust the number of implicit and explicit Hs for special cases
450 /*!
451 
452  Currently this:
453  - modifies aromatic nitrogens so that, when appropriate, they have an
454  explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
455 
456  \param mol the molecule of interest
457 
458  <b>Assumptions</b>
459  - this is called after the molecule has been sanitized,
460  aromaticity has been perceived, and the implicit valence of
461  everything has been calculated.
462 
463 */
464 void adjustHs(RWMol &mol);
465 
466 //! Kekulizes the molecule
467 /*!
468 
469  \param mol the molecule of interest
470  \param markAtomsBonds if this is set to true, \c isAromatic boolean settings
471  on both the Bonds and Atoms are turned to false
472  following
473  the Kekulization, otherwise they are left alone in
474  their
475  original state.
476  \param maxBackTracks the maximum number of attempts at back-tracking. The
477  algorithm
478  uses a back-tracking procedure to revist a previous
479  setting of
480  double bond if we hit a wall in the kekulization
481  process
482 
483  <b>Notes:</b>
484  - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
485  bonds will be changed from \c RDKit::Bond::AROMATIC to \c
486  RDKit::Bond::SINGLE
487  or RDKit::Bond::DOUBLE during Kekulization.
488 
489 */
490 void Kekulize(RWMol &mol, bool markAtomsBonds = true,
491  unsigned int maxBackTracks = 100);
492 
493 //! flags the molecule's conjugated bonds
494 void setConjugation(ROMol &mol);
495 
496 //! calculates and sets the hybridization of all a molecule's Stoms
497 void setHybridization(ROMol &mol);
498 
499 // @}
500 
501 //! \name Ring finding and SSSR
502 //@{
503 
504 //! finds a molecule's Smallest Set of Smallest Rings
505 /*!
506  Currently this implements a modified form of Figueras algorithm
507  (JCICS - Vol. 36, No. 5, 1996, 986-991)
508 
509  \param mol the molecule of interest
510  \param res used to return the vector of rings. Each entry is a vector with
511  atom indices. This information is also stored in the molecule's
512  RingInfo structure, so this argument is optional (see overload)
513 
514  \return number of smallest rings found
515 
516  Base algorithm:
517  - The original algorithm starts by finding representative degree 2
518  nodes.
519  - Representative because if a series of deg 2 nodes are found only
520  one of them is picked.
521  - The smallest ring around each of them is found.
522  - The bonds that connect to this degree 2 node are them chopped off,
523  yielding
524  new deg two nodes
525  - The process is repeated on the new deg 2 nodes.
526  - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
527  with it is found. A bond from this is "carefully" (look in the paper)
528  selected and chopped, yielding deg 2 nodes. The process is same as
529  above once this is done.
530 
531  Our Modifications:
532  - If available, more than one smallest ring around a representative deg 2
533  node will be computed and stored
534  - Typically 3 rings are found around a degree 3 node (when no deg 2s are
535  available)
536  and all the bond to that node are chopped.
537  - The extra rings that were found in this process are removed after all the
538  nodes
539  have been covered.
540 
541  These changes were motivated by several factors:
542  - We believe the original algorithm fails to find the correct SSSR
543  (finds the correct number of them but the wrong ones) on some sample mols
544  - Since SSSR may not be unique, a post-SSSR step to symmetrize may be done.
545  The extra rings this process adds can be quite useful.
546 */
547 int findSSSR(const ROMol &mol, std::vector<std::vector<int> > &res);
548 //! \overload
549 int findSSSR(const ROMol &mol, std::vector<std::vector<int> > *res = 0);
550 
551 //! use a DFS algorithm to identify ring bonds and atoms in a molecule
552 /*!
553  \b NOTE: though the RingInfo structure is populated by this function,
554  the only really reliable calls that can be made are to check if
555  mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
556  return values >0
557 */
558 void fastFindRings(const ROMol &mol);
559 
560 //! symmetrize the molecule's Smallest Set of Smallest Rings
561 /*!
562  SSSR rings obatined from "findSSSR" can be non-unique in some case.
563  For example, cubane has five SSSR rings, not six as one would hope.
564 
565  This function adds additional rings to the SSSR list if necessary
566  to make the list symmetric, e.g. all atoms in cubane will be part of the same
567  number
568  of SSSRs. This function choses these extra rings from the extra rings
569  computed
570  and discarded during findSSSR. The new ring are chosen such that:
571  - replacing a same sized ring in the SSSR list with an extra ring yields
572  the same union of bond IDs as the orignal SSSR list
573 
574  \param mol - the molecule of interest
575  \param res used to return the vector of rings. Each entry is a vector with
576  atom indices. This information is also stored in the molecule's
577  RingInfo structure, so this argument is optional (see overload)
578 
579  \return the total number of rings = (new rings + old SSSRs)
580 
581  <b>Notes:</b>
582  - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
583  first
584 */
585 int symmetrizeSSSR(ROMol &mol, std::vector<std::vector<int> > &res);
586 //! \overload
587 int symmetrizeSSSR(ROMol &mol);
588 
589 //@}
590 
591 //! \name Shortest paths and other matrices
592 //@{
593 
594 //! returns a molecule's adjacency matrix
595 /*!
596  \param mol the molecule of interest
597  \param useBO toggles use of bond orders in the matrix
598  \param emptyVal sets the empty value (for non-adjacent atoms)
599  \param force forces calculation of the matrix, even if already
600  computed
601  \param propNamePrefix used to set the cached property name
602 
603  \return the adjacency matrix.
604 
605  <b>Notes</b>
606  - The result of this is cached in the molecule's local property dictionary,
607  which will handle deallocation. The caller should <b>not</b> \c delete
608  this pointer.
609 
610 */
611 double *getAdjacencyMatrix(const ROMol &mol, bool useBO = false,
612  int emptyVal = 0, bool force = false,
613  const char *propNamePrefix = 0,
614  const boost::dynamic_bitset<> *bondsToUse = 0);
615 
616 //! Computes the molecule's topological distance matrix
617 /*!
618  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
619 
620  \param mol the molecule of interest
621  \param useBO toggles use of bond orders in the matrix
622  \param useAtomWts sets the diagonal elements of the result to
623  6.0/(atomic number) so that the matrix can be used to calculate
624  Balaban J values. This does not affect the bond weights.
625  \param force forces calculation of the matrix, even if already
626  computed
627  \param propNamePrefix used to set the cached property name
628 
629  \return the distance matrix.
630 
631  <b>Notes</b>
632  - The result of this is cached in the molecule's local property dictionary,
633  which will handle deallocation. The caller should <b>not</b> \c delete
634  this pointer.
635 
636 
637 */
638 double *getDistanceMat(const ROMol &mol, bool useBO = false,
639  bool useAtomWts = false, bool force = false,
640  const char *propNamePrefix = 0);
641 
642 //! Computes the molecule's topological distance matrix
643 /*!
644  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
645 
646  \param mol the molecule of interest
647  \param activeAtoms only elements corresponding to these atom indices
648  will be included in the calculation
649  \param bonds only bonds found in this list will be included in the
650  calculation
651  \param useBO toggles use of bond orders in the matrix
652  \param useAtomWts sets the diagonal elements of the result to
653  6.0/(atomic number) so that the matrix can be used to calculate
654  Balaban J values. This does not affect the bond weights.
655 
656  \return the distance matrix.
657 
658  <b>Notes</b>
659  - The results of this call are not cached, the caller <b>should</b> \c
660  delete
661  this pointer.
662 
663 
664 */
665 double *getDistanceMat(const ROMol &mol, const std::vector<int> &activeAtoms,
666  const std::vector<const Bond *> &bonds,
667  bool useBO = false, bool useAtomWts = false);
668 
669 //! Computes the molecule's 3D distance matrix
670 /*!
671 
672  \param mol the molecule of interest
673  \param confId the conformer to use
674  \param useAtomWts sets the diagonal elements of the result to
675  6.0/(atomic number)
676  \param force forces calculation of the matrix, even if already
677  computed
678  \param propNamePrefix used to set the cached property name
679  (if set to an empty string, the matrix will not be
680  cached)
681 
682  \return the distance matrix.
683 
684  <b>Notes</b>
685  - The result of this is cached in the molecule's local property dictionary,
686  which will handle deallocation. Do the caller should <b>not</b> \c delete
687  this pointer.
688 
689 */
690 double *get3DDistanceMat(const ROMol &mol, int confId = -1,
691  bool useAtomWts = false, bool force = false,
692  const char *propNamePrefix = 0);
693 //! Find the shortest path between two atoms
694 /*!
695  Uses the Bellman-Ford algorithm
696 
697  \param mol molecule of interest
698  \param aid1 index of the first atom
699  \param aid2 index of the second atom
700 
701  \return an std::list with the indices of the atoms along the shortest
702  path
703 
704  <b>Notes:</b>
705  - the starting and end atoms are included in the path
706  - if no path is found, an empty path is returned
707 
708 */
709 std::list<int> getShortestPath(const ROMol &mol, int aid1, int aid2);
710 
711 //@}
712 
713 #if 0
714  //! \name Canonicalization
715  //@{
716 
717  //! assign a canonical ordering to a molecule's atoms
718  /*!
719  The algorithm used here is a modification of the published Daylight canonical
720  smiles algorithm (i.e. it uses atom invariants and products of primes).
721 
722  \param mol the molecule of interest
723  \param ranks used to return the ranks
724  \param breakTies toggles breaking of ties (see below)
725  \param includeChirality toggles inclusion of chirality in the invariants
726  \param includeIsotopes toggles inclusion of isotopes in the invariants
727  \param rankHistory used to return the rank history (see below)
728 
729  <b>Notes:</b>
730  - Tie breaking should be done when it's important to have a full ordering
731  of the atoms (e.g. when generating canonical traversal trees). If it's
732  acceptable to have ties between symmetry-equivalent atoms (e.g. when
733  generating CIP codes), tie breaking can/should be skipped.
734  - if the \c rankHistory argument is provided, the evolution of the ranks of
735  individual atoms will be tracked. The \c rankHistory pointer should be
736  to a VECT_INT_VECT that has at least \c mol.getNumAtoms() elements.
737  */
738  void rankAtoms(const ROMol &mol,std::vector<int> &ranks,
739  bool breakTies=true,
740  bool includeChirality=true,
741  bool includeIsotopes=true,
742  std::vector<std::vector<int> > *rankHistory=0);
743  //! assign a canonical ordering to a sub-molecule's atoms
744  /*!
745  The algorithm used here is a modification of the published Daylight canonical
746  smiles algorithm (i.e. it uses atom invariants and products of primes).
747 
748  \param mol the molecule of interest
749  \param atomsToUse atoms to be included
750  \param bondsToUse bonds to be included
751  \param atomSymbols symbols to use for the atoms in the output (these are
752  used in place of atomic number and isotope information)
753  \param ranks used to return the ranks
754  \param breakTies toggles breaking of ties (see below)
755  \param rankHistory used to return the rank history (see below)
756 
757  <b>Notes:</b>
758  - Tie breaking should be done when it's important to have a full ordering
759  of the atoms (e.g. when generating canonical traversal trees). If it's
760  acceptable to have ties between symmetry-equivalent atoms (e.g. when
761  generating CIP codes), tie breaking can/should be skipped.
762  - if the \c rankHistory argument is provided, the evolution of the ranks of
763  individual atoms will be tracked. The \c rankHistory pointer should be
764  to a VECT_INT_VECT that has at least \c mol.getNumAtoms() elements.
765  */
766  void rankAtomsInFragment(const ROMol &mol,std::vector<int> &ranks,
767  const boost::dynamic_bitset<> &atomsToUse,
768  const boost::dynamic_bitset<> &bondsToUse,
769  const std::vector<std::string> *atomSymbols=0,
770  const std::vector<std::string> *bondSymbols=0,
771  bool breakTies=true,
772  std::vector<std::vector<int> > *rankHistory=0);
773 
774  // @}
775 #endif
776 //! \name Stereochemistry
777 //@{
778 
779 //! removes bogus chirality markers (those on non-sp3 centers):
780 void cleanupChirality(RWMol &mol);
781 
782 //! \brief Uses a conformer to assign ChiralType to a molecule's atoms
783 /*!
784  \param mol the molecule of interest
785  \param confId the conformer to use
786  \param replaceExistingTags if this flag is true, any existing atomic chiral
787  tags will be replaced
788 
789  If the conformer provided is not a 3D conformer, nothing will be done.
790 */
791 void assignChiralTypesFrom3D(ROMol &mol, int confId = -1,
792  bool replaceExistingTags = true);
793 
794 //! Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
795 /*!
796 
797  \param mol the molecule of interest
798  \param cleanIt toggles removal of stereo flags from double bonds that can
799  not have stereochemistry
800  \param force forces the calculation to be repeated even if it has
801  already been done
802  \param flagPossibleStereoCenters set the _ChiralityPossible property on
803  atoms that are possible stereocenters
804 
805  <b>Notes:M</b>
806  - Throughout we assume that we're working with a hydrogen-suppressed
807  graph.
808 
809 */
810 void assignStereochemistry(ROMol &mol, bool cleanIt = false, bool force = false,
811  bool flagPossibleStereoCenters = false);
812 //! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
813 //(i.e. Z/E)
814 /*!
815 
816  \param mol the molecule of interest
817 */
818 void removeStereochemistry(ROMol &mol);
819 
820 //! \brief finds bonds that could be cis/trans in a molecule and mark them as
821 //! Bond::STEREOANY.
822 /*!
823  \param mol the molecule of interest
824  \param cleanIt toggles removal of stereo flags from double bonds that can
825  not have stereochemistry
826 
827  This function finds any double bonds that can potentially be part of
828  a cis/trans system. No attempt is made here to mark them cis or
829  trans. No attempt is made to detect double bond stereo in ring systems.
830 
831  This function is useful in the following situations:
832  - when parsing a mol file; for the bonds marked here, coordinate
833  information on the neighbors can be used to indentify cis or trans states
834  - when writing a mol file; bonds that can be cis/trans but not marked as
835  either need to be specially marked in the mol file
836  - finding double bonds with unspecified stereochemistry so they
837  can be enumerated for downstream 3D tools
838 
839  The CIPranks on the neighboring atoms are checked in this function. The
840  _CIPCode property if set to any on the double bond.
841 */
842 void findPotentialStereoBonds(ROMol &mol, bool cleanIt = false);
843 //@}
844 
845 //! returns the number of atoms which have a particular property set
846 unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop);
847 
848 }; // end of namespace MolOps
849 }; // end of namespace RDKit
850 
851 #endif
ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=0, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
boost::uint32_t adjustDegreeFlags
Definition: MolOps.h:258
void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
double computeBalabanJ(const ROMol &mol, bool useBO=true, bool force=false, const std::vector< int > *bondPath=0, bool cacheIt=true)
calculates Balaban&#39;s J index for the molecule
void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralType to a molecule&#39;s atoms.
int findSSSR(const ROMol &mol, std::vector< std::vector< int > > &res)
finds a molecule&#39;s Smallest Set of Smallest Rings
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:30
ROMol * addHs(const ROMol &mol, bool explicitOnly=false, bool addCoords=false, const UINT_VECT *onlyOnAtoms=NULL)
returns a copy of a molecule with hydrogens added in as explicit Atoms
const int ci_LOCAL_INF
int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int > > &res)
symmetrize the molecule&#39;s Smallest Set of Smallest Rings
boost::uint32_t makeBondsGenericFlags
Definition: MolOps.h:266
AromaticityModel
Possible aromaticity models.
Definition: MolOps.h:385
boost::uint32_t adjustRingCountFlags
Definition: MolOps.h:260
boost::uint32_t makeAtomsGenericFlags
Definition: MolOps.h:268
void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and
ROMol * adjustQueryProperties(const ROMol &mol, const AdjustQueryParameters *params=NULL)
returns a copy of a molecule with query properties adjusted
int countAtomElec(const Atom *at)
return the number of electrons available on an atom to donate for
INVAR_VECT::iterator INVAR_VECT_I
Definition: MolOps.h:30
void cleanupChirality(RWMol &mol)
removes bogus chirality markers (those on non-sp3 centers):
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:106
bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop)
returns the number of atoms which have a particular property set
double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=0)
Computes the molecule&#39;s topological distance matrix.
int setAromaticity(RWMol &mol, AromaticityModel model=AROMATICITY_DEFAULT, int(*func)(RWMol &)=NULL)
Sets up the aromaticity for a molecule.
void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
void setConjugation(ROMol &mol)
flags the molecule&#39;s conjugated bonds
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition: MolOps.h:31
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false)
void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
void removeStereochemistry(ROMol &mol)
Removes all stereochemistry information from atoms (i.e. R/S) and bonds.
class for representing a bond
Definition: Bond.h:47
void cleanUp(RWMol &mol)
Designed to be called by the sanitizer to handle special cases before.
void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule&#39;s Stoms
double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=0, const boost::dynamic_bitset<> *bondsToUse=0)
returns a molecule&#39;s adjacency matrix
std::vector< UINT > UINT_VECT
Definition: types.h:209
std::vector< double > INVAR_VECT
Definition: MolOps.h:28
double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=0)
Computes the molecule&#39;s 3D distance matrix.
void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
AdjustQueryWhichFlags
Definition: MolOps.h:246
void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREOANY.
The class for representing atoms.
Definition: Atom.h:68