RDKit
Open-source cheminformatics and machine learning.
MolOps.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2012 Greg Landrum and Rational Discovery LLC
3 // Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc.
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #ifndef _RD_MOL_OPS_H_
12 #define _RD_MOL_OPS_H_
13 
14 #include <vector>
15 #include <map>
16 #include <list>
18 #include <boost/smart_ptr.hpp>
19 #include <boost/dynamic_bitset.hpp>
21 #include <RDGeneral/types.h>
22 
23 extern const int ci_LOCAL_INF;
24 namespace RDKit {
25 class ROMol;
26 class RWMol;
27 class Atom;
28 class Bond;
29 typedef std::vector<double> INVAR_VECT;
30 typedef INVAR_VECT::iterator INVAR_VECT_I;
31 typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
32 
33 //! \brief Groups a variety of molecular query and transformation operations.
34 namespace MolOps {
35 
36 //! return the number of electrons available on an atom to donate for
37 // aromaticity
38 /*!
39  The result is determined using the default valency, number of lone pairs,
40  number of bonds and the formal charge. Note that the atom may not donate
41  all of these electrons to a ring for aromaticity (also used in Conjugation
42  and hybridization code).
43 
44  \param at the atom of interest
45 
46  \return the number of electrons
47 */
48 int countAtomElec(const Atom *at);
49 
50 //! sums up all atomic formal charges and returns the result
51 int getFormalCharge(const ROMol &mol);
52 
53 //! returns whether or not the given Atom is involved in a conjugated bond
54 bool atomHasConjugatedBond(const Atom *at);
55 
56 //! find fragments (disconnected components of the molecular graph)
57 /*!
58 
59  \param mol the molecule of interest
60  \param mapping used to return the mapping of Atoms->fragments.
61  On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
62  and will contain the fragment assignment for each Atom
63 
64  \return the number of fragments found.
65 
66 */
67 unsigned int getMolFrags(const ROMol &mol, std::vector<int> &mapping);
68 //! find fragments (disconnected components of the molecular graph)
69 /*!
70 
71  \param mol the molecule of interest
72  \param frags used to return the Atoms in each fragment
73  On return \c mapping will be \c numFrags long, and each entry
74  will contain the indices of the Atoms in that fragment.
75 
76  \return the number of fragments found.
77 
78 */
79 unsigned int getMolFrags(const ROMol &mol,
80  std::vector<std::vector<int> > &frags);
81 
82 //! splits a molecule into its component fragments
83 // (disconnected components of the molecular graph)
84 /*!
85 
86  \param mol the molecule of interest
87  \param sanitizeFrags toggles sanitization of the fragments after
88  they are built
89  \param frags used to return the mapping of Atoms->fragments.
90  if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
91  on return and will contain the fragment assignment for each Atom
92  \param fragsMolAtomMapping used to return the Atoms in each fragment
93  On return \c mapping will be \c numFrags long, and each entry
94  will contain the indices of the Atoms in that fragment.
95  \param copyConformers toggles copying conformers of the fragments after
96  they are built
97  \return a vector of the fragments as smart pointers to ROMols
98 
99 */
100 std::vector<boost::shared_ptr<ROMol> > getMolFrags(
101  const ROMol &mol, bool sanitizeFrags = true, std::vector<int> *frags = 0,
102  std::vector<std::vector<int> > *fragsMolAtomMapping = 0,
103  bool copyConformers = true);
104 
105 //! splits a molecule into pieces based on labels assigned using a query
106 /*!
107 
108  \param mol the molecule of interest
109  \param query the query used to "label" the molecule for fragmentation
110  \param sanitizeFrags toggles sanitization of the fragments after
111  they are built
112  \param whiteList if provided, only labels in the list will be kept
113  \param negateList if true, the white list logic will be inverted: only labels
114  not in the list will be kept
115 
116  \return a map of the fragments and their labels
117 
118 */
119 template <typename T>
120 std::map<T, boost::shared_ptr<ROMol> > getMolFragsWithQuery(
121  const ROMol &mol, T (*query)(const ROMol &, const Atom *),
122  bool sanitizeFrags = true, const std::vector<T> *whiteList = 0,
123  bool negateList = false);
124 
125 #if 0
126  //! finds a molecule's minimium spanning tree (MST)
127  /*!
128  \param mol the molecule of interest
129  \param mst used to return the MST as a vector of bond indices
130  */
131  void findSpanningTree(const ROMol &mol,std::vector<int> &mst);
132 #endif
133 
134 //! calculates Balaban's J index for the molecule
135 /*!
136  \param mol the molecule of interest
137  \param useBO toggles inclusion of the bond order in the calculation
138  (when false, we're not really calculating the J value)
139  \param force forces the calculation (instead of using cached results)
140  \param bondPath when included, only paths using bonds whose indices occur
141  in this vector will be included in the calculation
142  \param cacheIt If this is true, the calculated value will be cached
143  as a property on the molecule
144  \return the J index
145 
146 */
147 double computeBalabanJ(const ROMol &mol, bool useBO = true, bool force = false,
148  const std::vector<int> *bondPath = 0,
149  bool cacheIt = true);
150 //! \overload
151 double computeBalabanJ(double *distMat, int nb, int nAts);
152 
153 //! \name Dealing with hydrogens
154 //{@
155 
156 //! returns a copy of a molecule with hydrogens added in as explicit Atoms
157 /*!
158  \param mol the molecule to add Hs to
159  \param explicitOnly (optional) if this \c true, only explicit Hs will be
160  added
161  \param addCoords (optional) If this is true, estimates for the atomic
162  coordinates
163  of the added Hs will be used.
164  \param onlyOnAtoms (optional) if provided, this should be a vector of
165  IDs of the atoms that will be considered for H addition.
166 
167  \return the new molecule
168 
169  <b>Notes:</b>
170  - it makes no sense to use the \c addCoords option if the molecule's
171  heavy
172  atoms don't already have coordinates.
173  - the caller is responsible for <tt>delete</tt>ing the pointer this
174  returns.
175  */
176 ROMol *addHs(const ROMol &mol, bool explicitOnly = false,
177  bool addCoords = false, const UINT_VECT *onlyOnAtoms = NULL);
178 //! \overload
179 // modifies the molecule in place
180 void addHs(RWMol &mol, bool explicitOnly = false, bool addCoords = false,
181  const UINT_VECT *onlyOnAtoms = NULL);
182 
183 //! returns a copy of a molecule with hydrogens removed
184 /*!
185  \param mol the molecule to remove Hs from
186  \param implicitOnly (optional) if this \c true, only implicit Hs will be
187  removed
188  \param updateExplicitCount (optional) If this is \c true, when explicit Hs
189  are removed
190  from the graph, the heavy atom to which they are bound will have its
191  counter of
192  explicit Hs increased.
193  \param sanitize: (optional) If this is \c true, the final molecule will be
194  sanitized
195 
196  \return the new molecule
197 
198  <b>Notes:</b>
199  - Hydrogens which aren't connected to a heavy atom will not be
200  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
201  all atoms removed.
202  - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
203  will not be removed.
204  - two coordinate Hs, like the central H in C[H-]C, will not be removed
205  - Hs connected to dummy atoms will not be removed
206 
207  - the caller is responsible for <tt>delete</tt>ing the pointer this
208  returns.
209 */
210 ROMol *removeHs(const ROMol &mol, bool implicitOnly = false,
211  bool updateExplicitCount = false, bool sanitize = true);
212 //! \overload
213 // modifies the molecule in place
214 void removeHs(RWMol &mol, bool implicitOnly = false,
215  bool updateExplicitCount = false, bool sanitize = true);
216 
217 //! returns a copy of a molecule with hydrogens removed and added as queries
218 //! to the heavy atoms to which they are bound.
219 /*!
220  This is really intended to be used with molecules that contain QueryAtoms
221 
222  \param mol the molecule to remove Hs from
223 
224  \return the new molecule
225 
226  <b>Notes:</b>
227  - Atoms that do not already have hydrogen count queries will have one
228  added, other H-related queries will not be touched. Examples:
229  - C[H] -> [C;!H0]
230  - [C;H1][H] -> [C;H1]
231  - [C;H2][H] -> [C;H2]
232  - Hydrogens which aren't connected to a heavy atom will not be
233  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
234  all atoms removed.
235  - the caller is responsible for <tt>delete</tt>ing the pointer this returns.
236  - By default all hydrogens are removed, however if
237  mergeUnmappedOnly is true, any hydrogen participating
238  in an atom map will be retained
239 
240 */
241 ROMol *mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly = false);
242 //! \overload
243 // modifies the molecule in place
244 void mergeQueryHs(RWMol &mol, bool mergeUnmappedOnly = false);
245 
246 typedef enum {
253  ADJUST_IGNOREALL = 0xFFFFFFF
255 
257  bool adjustDegree; /**< add degree queries */
258  boost::uint32_t adjustDegreeFlags;
259  bool adjustRingCount; /**< add ring-count queries */
260  boost::uint32_t adjustRingCountFlags;
261 
262  bool makeDummiesQueries; /**< convert dummy atoms without isotope labels to
263  any-atom queries */
265  bool makeBondsGeneric; /**< convert bonds to generic queries (any bonds) */
266  boost::uint32_t makeBondsGenericFlags;
267  bool makeAtomsGeneric; /**< convert atoms to generic queries (any atoms) */
268  boost::uint32_t makeAtomsGenericFlags;
269  bool adjustHeavyDegree; /**< adjust the heavy-atom degree instead of overall
270  degree */
271  boost::uint32_t adjustHeavyDegreeFlags;
272 
274  : adjustDegree(true),
275  adjustDegreeFlags(ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS),
276  adjustRingCount(false),
277  adjustRingCountFlags(ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS),
278  makeDummiesQueries(true),
279  aromatizeIfPossible(true),
280  makeBondsGeneric(false),
281  makeBondsGenericFlags(ADJUST_IGNORENONE),
282  makeAtomsGeneric(false),
283  makeAtomsGenericFlags(ADJUST_IGNORENONE),
284  adjustHeavyDegree(false),
285  adjustHeavyDegreeFlags(ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS) {}
286 };
287 //! returns a copy of a molecule with query properties adjusted
288 /*!
289  \param mol the molecule to adjust
290  \param params controls the adjustments made
291 
292  \return the new molecule
293 */
294 ROMol *adjustQueryProperties(const ROMol &mol,
295  const AdjustQueryParameters *params = NULL);
296 //! \overload
297 // modifies the molecule in place
298 void adjustQueryProperties(RWMol &mol,
299  const AdjustQueryParameters *params = NULL);
300 
301 //! returns a copy of a molecule with the atoms renumbered
302 /*!
303 
304  \param mol the molecule to work with
305  \param newOrder the new ordering of the atoms (should be numAtoms long)
306  for example: if newOrder is [3,2,0,1], then atom 3 in the original
307  molecule will be atom 0 in the new one
308 
309  \return the new molecule
310 
311  <b>Notes:</b>
312  - the caller is responsible for <tt>delete</tt>ing the pointer this returns.
313 
314 */
315 ROMol *renumberAtoms(const ROMol &mol,
316  const std::vector<unsigned int> &newOrder);
317 
318 //@}
319 
320 //! \name Sanitization
321 //@{
322 
323 typedef enum {
335  SANITIZE_ALL = 0xFFFFFFF
336 } SanitizeFlags;
337 
338 //! \brief carries out a collection of tasks for cleaning up a molecule and
339 // ensuring
340 //! that it makes "chemical sense"
341 /*!
342  This functions calls the following in sequence
343  -# MolOps::cleanUp()
344  -# mol.updatePropertyCache()
345  -# MolOps::symmetrizeSSSR()
346  -# MolOps::Kekulize()
347  -# MolOps::assignRadicals()
348  -# MolOps::setAromaticity()
349  -# MolOps::setConjugation()
350  -# MolOps::setHybridization()
351  -# MolOps::cleanupChirality()
352  -# MolOps::adjustHs()
353 
354  \param mol : the RWMol to be cleaned
355 
356  \param operationThatFailed : the first (if any) sanitization operation that
357  fails is set here.
358  The values are taken from the \c SanitizeFlags
359  enum.
360  On success, the value is \c
361  SanitizeFlags::SANITIZE_NONE
362 
363  \param sanitizeOps : the bits here are used to set which sanitization
364  operations are carried
365  out. The elements of the \c SanitizeFlags enum define
366  the operations.
367 
368  <b>Notes:</b>
369  - If there is a failure in the sanitization, a \c SanitException
370  will be thrown.
371  - in general the user of this function should cast the molecule following
372  this
373  function to a ROMol, so that new atoms and bonds cannot be added to the
374  molecule and screw up the sanitizing that has been done here
375 */
376 void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed,
377  unsigned int sanitizeOps = SANITIZE_ALL);
378 //! \overload
379 void sanitizeMol(RWMol &mol);
380 
381 //! Possible aromaticity models
382 /*!
383 - \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT
384 - \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit
385 Book)
386 - \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it
387 does not consider the outer envelope of fused rings)
388 - \c AROMATICITY_CUSTOM uses a caller-provided function
389 */
390 typedef enum {
391  AROMATICITY_DEFAULT = 0x0, ///< future proofing
394  AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function
396 
397 //! Sets up the aromaticity for a molecule
398 /*!
399 
400  This is what happens here:
401  -# find all the simple rings by calling the findSSSR function
402  -# loop over all the Atoms in each ring and mark them if they are
403  candidates
404  for aromaticity. A ring atom is a candidate if it can spare electrons
405  to the ring and if it's from the first two rows of the periodic table.
406  -# based on the candidate atoms, mark the rings to be either candidates
407  or non-candidates. A ring is a candidate only if all its atoms are
408  candidates
409  -# apply Hueckel rule to each of the candidate rings to check if the ring
410  can be
411  aromatic
412 
413  \param mol the RWMol of interest
414  \param model the aromaticity model to use
415  \param func a custom function for assigning aromaticity (only used when
416  model=\c AROMATICITY_CUSTOM)
417 
418  \return >0 on success, <= 0 otherwise
419 
420  <b>Assumptions:</b>
421  - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
422  been called)
423 
424 */
426  int (*func)(RWMol &) = NULL);
427 
428 //! Designed to be called by the sanitizer to handle special cases before
429 // anything is done.
430 /*!
431 
432  Currently this:
433  - modifies nitro groups, so that the nitrogen does not have an unreasonable
434  valence of 5, as follows:
435  - the nitrogen gets a positive charge
436  - one of the oxygens gets a negative chage and the double bond to this
437  oxygen is changed to a single bond
438  The net result is that nitro groups can be counted on to be:
439  \c "[N+](=O)[O-]"
440  - modifies halogen-oxygen containing species as follows:
441  \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
442  \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
443  \c [Cl,Br,I](=O)O -> [X+]([O-])O
444  - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
445 
446  \param mol the molecule of interest
447 
448 */
449 void cleanUp(RWMol &mol);
450 
451 //! Called by the sanitizer to assign radical counts to atoms
452 void assignRadicals(RWMol &mol);
453 
454 //! adjust the number of implicit and explicit Hs for special cases
455 /*!
456 
457  Currently this:
458  - modifies aromatic nitrogens so that, when appropriate, they have an
459  explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
460 
461  \param mol the molecule of interest
462 
463  <b>Assumptions</b>
464  - this is called after the molecule has been sanitized,
465  aromaticity has been perceived, and the implicit valence of
466  everything has been calculated.
467 
468 */
469 void adjustHs(RWMol &mol);
470 
471 //! Kekulizes the molecule
472 /*!
473 
474  \param mol the molecule of interest
475  \param markAtomsBonds if this is set to true, \c isAromatic boolean settings
476  on both the Bonds and Atoms are turned to false
477  following
478  the Kekulization, otherwise they are left alone in
479  their
480  original state.
481  \param maxBackTracks the maximum number of attempts at back-tracking. The
482  algorithm
483  uses a back-tracking procedure to revist a previous
484  setting of
485  double bond if we hit a wall in the kekulization
486  process
487 
488  <b>Notes:</b>
489  - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
490  bonds will be changed from \c RDKit::Bond::AROMATIC to \c
491  RDKit::Bond::SINGLE
492  or RDKit::Bond::DOUBLE during Kekulization.
493 
494 */
495 void Kekulize(RWMol &mol, bool markAtomsBonds = true,
496  unsigned int maxBackTracks = 100);
497 
498 //! flags the molecule's conjugated bonds
499 void setConjugation(ROMol &mol);
500 
501 //! calculates and sets the hybridization of all a molecule's Stoms
502 void setHybridization(ROMol &mol);
503 
504 // @}
505 
506 //! \name Ring finding and SSSR
507 //@{
508 
509 //! finds a molecule's Smallest Set of Smallest Rings
510 /*!
511  Currently this implements a modified form of Figueras algorithm
512  (JCICS - Vol. 36, No. 5, 1996, 986-991)
513 
514  \param mol the molecule of interest
515  \param res used to return the vector of rings. Each entry is a vector with
516  atom indices. This information is also stored in the molecule's
517  RingInfo structure, so this argument is optional (see overload)
518 
519  \return number of smallest rings found
520 
521  Base algorithm:
522  - The original algorithm starts by finding representative degree 2
523  nodes.
524  - Representative because if a series of deg 2 nodes are found only
525  one of them is picked.
526  - The smallest ring around each of them is found.
527  - The bonds that connect to this degree 2 node are them chopped off,
528  yielding
529  new deg two nodes
530  - The process is repeated on the new deg 2 nodes.
531  - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
532  with it is found. A bond from this is "carefully" (look in the paper)
533  selected and chopped, yielding deg 2 nodes. The process is same as
534  above once this is done.
535 
536  Our Modifications:
537  - If available, more than one smallest ring around a representative deg 2
538  node will be computed and stored
539  - Typically 3 rings are found around a degree 3 node (when no deg 2s are
540  available)
541  and all the bond to that node are chopped.
542  - The extra rings that were found in this process are removed after all the
543  nodes
544  have been covered.
545 
546  These changes were motivated by several factors:
547  - We believe the original algorithm fails to find the correct SSSR
548  (finds the correct number of them but the wrong ones) on some sample mols
549  - Since SSSR may not be unique, a post-SSSR step to symmetrize may be done.
550  The extra rings this process adds can be quite useful.
551 */
552 int findSSSR(const ROMol &mol, std::vector<std::vector<int> > &res);
553 //! \overload
554 int findSSSR(const ROMol &mol, std::vector<std::vector<int> > *res = 0);
555 
556 //! use a DFS algorithm to identify ring bonds and atoms in a molecule
557 /*!
558  \b NOTE: though the RingInfo structure is populated by this function,
559  the only really reliable calls that can be made are to check if
560  mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
561  return values >0
562 */
563 void fastFindRings(const ROMol &mol);
564 
565 //! symmetrize the molecule's Smallest Set of Smallest Rings
566 /*!
567  SSSR rings obatined from "findSSSR" can be non-unique in some case.
568  For example, cubane has five SSSR rings, not six as one would hope.
569 
570  This function adds additional rings to the SSSR list if necessary
571  to make the list symmetric, e.g. all atoms in cubane will be part of the same
572  number
573  of SSSRs. This function choses these extra rings from the extra rings
574  computed
575  and discarded during findSSSR. The new ring are chosen such that:
576  - replacing a same sized ring in the SSSR list with an extra ring yields
577  the same union of bond IDs as the orignal SSSR list
578 
579  \param mol - the molecule of interest
580  \param res used to return the vector of rings. Each entry is a vector with
581  atom indices. This information is also stored in the molecule's
582  RingInfo structure, so this argument is optional (see overload)
583 
584  \return the total number of rings = (new rings + old SSSRs)
585 
586  <b>Notes:</b>
587  - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
588  first
589 */
590 int symmetrizeSSSR(ROMol &mol, std::vector<std::vector<int> > &res);
591 //! \overload
592 int symmetrizeSSSR(ROMol &mol);
593 
594 //@}
595 
596 //! \name Shortest paths and other matrices
597 //@{
598 
599 //! returns a molecule's adjacency matrix
600 /*!
601  \param mol the molecule of interest
602  \param useBO toggles use of bond orders in the matrix
603  \param emptyVal sets the empty value (for non-adjacent atoms)
604  \param force forces calculation of the matrix, even if already
605  computed
606  \param propNamePrefix used to set the cached property name
607 
608  \return the adjacency matrix.
609 
610  <b>Notes</b>
611  - The result of this is cached in the molecule's local property dictionary,
612  which will handle deallocation. The caller should <b>not</b> \c delete
613  this pointer.
614 
615 */
616 double *getAdjacencyMatrix(const ROMol &mol, bool useBO = false,
617  int emptyVal = 0, bool force = false,
618  const char *propNamePrefix = 0,
619  const boost::dynamic_bitset<> *bondsToUse = 0);
620 
621 //! Computes the molecule's topological distance matrix
622 /*!
623  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
624 
625  \param mol the molecule of interest
626  \param useBO toggles use of bond orders in the matrix
627  \param useAtomWts sets the diagonal elements of the result to
628  6.0/(atomic number) so that the matrix can be used to calculate
629  Balaban J values. This does not affect the bond weights.
630  \param force forces calculation of the matrix, even if already
631  computed
632  \param propNamePrefix used to set the cached property name
633 
634  \return the distance matrix.
635 
636  <b>Notes</b>
637  - The result of this is cached in the molecule's local property dictionary,
638  which will handle deallocation. The caller should <b>not</b> \c delete
639  this pointer.
640 
641 
642 */
643 double *getDistanceMat(const ROMol &mol, bool useBO = false,
644  bool useAtomWts = false, bool force = false,
645  const char *propNamePrefix = 0);
646 
647 //! Computes the molecule's topological distance matrix
648 /*!
649  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
650 
651  \param mol the molecule of interest
652  \param activeAtoms only elements corresponding to these atom indices
653  will be included in the calculation
654  \param bonds only bonds found in this list will be included in the
655  calculation
656  \param useBO toggles use of bond orders in the matrix
657  \param useAtomWts sets the diagonal elements of the result to
658  6.0/(atomic number) so that the matrix can be used to calculate
659  Balaban J values. This does not affect the bond weights.
660 
661  \return the distance matrix.
662 
663  <b>Notes</b>
664  - The results of this call are not cached, the caller <b>should</b> \c
665  delete
666  this pointer.
667 
668 
669 */
670 double *getDistanceMat(const ROMol &mol, const std::vector<int> &activeAtoms,
671  const std::vector<const Bond *> &bonds,
672  bool useBO = false, bool useAtomWts = false);
673 
674 //! Computes the molecule's 3D distance matrix
675 /*!
676 
677  \param mol the molecule of interest
678  \param confId the conformer to use
679  \param useAtomWts sets the diagonal elements of the result to
680  6.0/(atomic number)
681  \param force forces calculation of the matrix, even if already
682  computed
683  \param propNamePrefix used to set the cached property name
684  (if set to an empty string, the matrix will not be
685  cached)
686 
687  \return the distance matrix.
688 
689  <b>Notes</b>
690  - The result of this is cached in the molecule's local property dictionary,
691  which will handle deallocation. Do the caller should <b>not</b> \c delete
692  this pointer.
693 
694 */
695 double *get3DDistanceMat(const ROMol &mol, int confId = -1,
696  bool useAtomWts = false, bool force = false,
697  const char *propNamePrefix = 0);
698 //! Find the shortest path between two atoms
699 /*!
700  Uses the Bellman-Ford algorithm
701 
702  \param mol molecule of interest
703  \param aid1 index of the first atom
704  \param aid2 index of the second atom
705 
706  \return an std::list with the indices of the atoms along the shortest
707  path
708 
709  <b>Notes:</b>
710  - the starting and end atoms are included in the path
711  - if no path is found, an empty path is returned
712 
713 */
714 std::list<int> getShortestPath(const ROMol &mol, int aid1, int aid2);
715 
716 //@}
717 
718 #if 0
719  //! \name Canonicalization
720  //@{
721 
722  //! assign a canonical ordering to a molecule's atoms
723  /*!
724  The algorithm used here is a modification of the published Daylight canonical
725  smiles algorithm (i.e. it uses atom invariants and products of primes).
726 
727  \param mol the molecule of interest
728  \param ranks used to return the ranks
729  \param breakTies toggles breaking of ties (see below)
730  \param includeChirality toggles inclusion of chirality in the invariants
731  \param includeIsotopes toggles inclusion of isotopes in the invariants
732  \param rankHistory used to return the rank history (see below)
733 
734  <b>Notes:</b>
735  - Tie breaking should be done when it's important to have a full ordering
736  of the atoms (e.g. when generating canonical traversal trees). If it's
737  acceptable to have ties between symmetry-equivalent atoms (e.g. when
738  generating CIP codes), tie breaking can/should be skipped.
739  - if the \c rankHistory argument is provided, the evolution of the ranks of
740  individual atoms will be tracked. The \c rankHistory pointer should be
741  to a VECT_INT_VECT that has at least \c mol.getNumAtoms() elements.
742  */
743  void rankAtoms(const ROMol &mol,std::vector<int> &ranks,
744  bool breakTies=true,
745  bool includeChirality=true,
746  bool includeIsotopes=true,
747  std::vector<std::vector<int> > *rankHistory=0);
748  //! assign a canonical ordering to a sub-molecule's atoms
749  /*!
750  The algorithm used here is a modification of the published Daylight canonical
751  smiles algorithm (i.e. it uses atom invariants and products of primes).
752 
753  \param mol the molecule of interest
754  \param atomsToUse atoms to be included
755  \param bondsToUse bonds to be included
756  \param atomSymbols symbols to use for the atoms in the output (these are
757  used in place of atomic number and isotope information)
758  \param ranks used to return the ranks
759  \param breakTies toggles breaking of ties (see below)
760  \param rankHistory used to return the rank history (see below)
761 
762  <b>Notes:</b>
763  - Tie breaking should be done when it's important to have a full ordering
764  of the atoms (e.g. when generating canonical traversal trees). If it's
765  acceptable to have ties between symmetry-equivalent atoms (e.g. when
766  generating CIP codes), tie breaking can/should be skipped.
767  - if the \c rankHistory argument is provided, the evolution of the ranks of
768  individual atoms will be tracked. The \c rankHistory pointer should be
769  to a VECT_INT_VECT that has at least \c mol.getNumAtoms() elements.
770  */
771  void rankAtomsInFragment(const ROMol &mol,std::vector<int> &ranks,
772  const boost::dynamic_bitset<> &atomsToUse,
773  const boost::dynamic_bitset<> &bondsToUse,
774  const std::vector<std::string> *atomSymbols=0,
775  const std::vector<std::string> *bondSymbols=0,
776  bool breakTies=true,
777  std::vector<std::vector<int> > *rankHistory=0);
778 
779  // @}
780 #endif
781 //! \name Stereochemistry
782 //@{
783 
784 //! removes bogus chirality markers (those on non-sp3 centers):
785 void cleanupChirality(RWMol &mol);
786 
787 //! \brief Uses a conformer to assign ChiralType to a molecule's atoms
788 /*!
789  \param mol the molecule of interest
790  \param confId the conformer to use
791  \param replaceExistingTags if this flag is true, any existing atomic chiral
792  tags will be replaced
793 
794  If the conformer provided is not a 3D conformer, nothing will be done.
795 */
796 void assignChiralTypesFrom3D(ROMol &mol, int confId = -1,
797  bool replaceExistingTags = true);
798 
799 //! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms and
800 //! stereo flags to its bonds
801 /*!
802 
803  \param mol the molecule of interest
804  \param confId the conformer to use
805  \param replaceExistingTags if this flag is true, any existing info about
806  stereochemistry will be replaced
807 
808 */
809 void assignStereochemistryFrom3D(ROMol &mol, int confId = -1,
810  bool replaceExistingTags = true);
811 
812 //! \brief Uses a conformer to assign directionality to the single bonds
813 //! around double bonds
814 /*!
815 
816  \param mol the molecule of interest
817  \param confId the conformer to use
818 */
819 void detectBondStereochemistry(ROMol &mol, int confId = -1);
820 
821 //! Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
822 /*!
823 
824  \param mol the molecule of interest
825  \param cleanIt toggles removal of stereo flags from double bonds that can
826  not have stereochemistry
827  \param force forces the calculation to be repeated even if it has
828  already been done
829  \param flagPossibleStereoCenters set the _ChiralityPossible property on
830  atoms that are possible stereocenters
831 
832  <b>Notes:M</b>
833  - Throughout we assume that we're working with a hydrogen-suppressed
834  graph.
835 
836 */
837 void assignStereochemistry(ROMol &mol, bool cleanIt = false, bool force = false,
838  bool flagPossibleStereoCenters = false);
839 //! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
840 //(i.e. Z/E)
841 /*!
842 
843  \param mol the molecule of interest
844 */
845 void removeStereochemistry(ROMol &mol);
846 
847 //! \brief finds bonds that could be cis/trans in a molecule and mark them as
848 //! Bond::STEREOANY.
849 /*!
850  \param mol the molecule of interest
851  \param cleanIt toggles removal of stereo flags from double bonds that can
852  not have stereochemistry
853 
854  This function finds any double bonds that can potentially be part of
855  a cis/trans system. No attempt is made here to mark them cis or
856  trans. No attempt is made to detect double bond stereo in ring systems.
857 
858  This function is useful in the following situations:
859  - when parsing a mol file; for the bonds marked here, coordinate
860  information on the neighbors can be used to indentify cis or trans states
861  - when writing a mol file; bonds that can be cis/trans but not marked as
862  either need to be specially marked in the mol file
863  - finding double bonds with unspecified stereochemistry so they
864  can be enumerated for downstream 3D tools
865 
866  The CIPranks on the neighboring atoms are checked in this function. The
867  _CIPCode property if set to any on the double bond.
868 */
869 void findPotentialStereoBonds(ROMol &mol, bool cleanIt = false);
870 //@}
871 
872 //! returns the number of atoms which have a particular property set
873 unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop);
874 
875 }; // end of namespace MolOps
876 }; // end of namespace RDKit
877 
878 #endif
ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=0, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
boost::uint32_t adjustDegreeFlags
Definition: MolOps.h:258
void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
double computeBalabanJ(const ROMol &mol, bool useBO=true, bool force=false, const std::vector< int > *bondPath=0, bool cacheIt=true)
calculates Balaban&#39;s J index for the molecule
void assignStereochemistryFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule&#39;s atoms and stereo flags to its bonds...
void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralType to a molecule&#39;s atoms.
int findSSSR(const ROMol &mol, std::vector< std::vector< int > > &res)
finds a molecule&#39;s Smallest Set of Smallest Rings
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:30
ROMol * addHs(const ROMol &mol, bool explicitOnly=false, bool addCoords=false, const UINT_VECT *onlyOnAtoms=NULL)
returns a copy of a molecule with hydrogens added in as explicit Atoms
const int ci_LOCAL_INF
int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int > > &res)
symmetrize the molecule&#39;s Smallest Set of Smallest Rings
boost::uint32_t makeBondsGenericFlags
Definition: MolOps.h:266
AromaticityModel
Possible aromaticity models.
Definition: MolOps.h:390
boost::uint32_t adjustRingCountFlags
Definition: MolOps.h:260
boost::uint32_t makeAtomsGenericFlags
Definition: MolOps.h:268
void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and
ROMol * adjustQueryProperties(const ROMol &mol, const AdjustQueryParameters *params=NULL)
returns a copy of a molecule with query properties adjusted
int countAtomElec(const Atom *at)
return the number of electrons available on an atom to donate for
INVAR_VECT::iterator INVAR_VECT_I
Definition: MolOps.h:30
void cleanupChirality(RWMol &mol)
removes bogus chirality markers (those on non-sp3 centers):
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:103
bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
boost::uint32_t adjustHeavyDegreeFlags
Definition: MolOps.h:271
unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop)
returns the number of atoms which have a particular property set
double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=0)
Computes the molecule&#39;s topological distance matrix.
int setAromaticity(RWMol &mol, AromaticityModel model=AROMATICITY_DEFAULT, int(*func)(RWMol &)=NULL)
Sets up the aromaticity for a molecule.
void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
void setConjugation(ROMol &mol)
flags the molecule&#39;s conjugated bonds
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition: MolOps.h:31
Std stuff.
Definition: Atom.h:29
ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false)
void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
void removeStereochemistry(ROMol &mol)
Removes all stereochemistry information from atoms (i.e. R/S) and bonds.
class for representing a bond
Definition: Bond.h:47
void cleanUp(RWMol &mol)
Designed to be called by the sanitizer to handle special cases before.
void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule&#39;s Stoms
double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=0, const boost::dynamic_bitset<> *bondsToUse=0)
returns a molecule&#39;s adjacency matrix
std::vector< UINT > UINT_VECT
Definition: types.h:210
std::vector< double > INVAR_VECT
Definition: MolOps.h:28
double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=0)
Computes the molecule&#39;s 3D distance matrix.
void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
void detectBondStereochemistry(ROMol &mol, int confId=-1)
Uses a conformer to assign directionality to the single bonds around double bonds.
AdjustQueryWhichFlags
Definition: MolOps.h:246
void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREOANY.
The class for representing atoms.
Definition: Atom.h:68