RDKit
Open-source cheminformatics and machine learning.
MolOps.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2012 Greg Landrum and Rational Discovery LLC
3 // Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc.
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #ifndef _RD_MOL_OPS_H_
12 #define _RD_MOL_OPS_H_
13 
14 #include <vector>
15 #include <map>
16 #include <list>
17 #include <boost/smart_ptr.hpp>
18 #include <boost/dynamic_bitset.hpp>
19 #include <RDGeneral/types.h>
20 
21 extern const int ci_LOCAL_INF;
22 namespace RDKit {
23 class ROMol;
24 class RWMol;
25 class Atom;
26 class Bond;
27 typedef std::vector<double> INVAR_VECT;
28 typedef INVAR_VECT::iterator INVAR_VECT_I;
29 typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
30 
31 //! \brief Groups a variety of molecular query and transformation operations.
32 namespace MolOps {
33 
34 //! return the number of electrons available on an atom to donate for
35 // aromaticity
36 /*!
37  The result is determined using the default valency, number of lone pairs,
38  number of bonds and the formal charge. Note that the atom may not donate
39  all of these electrons to a ring for aromaticity (also used in Conjugation
40  and hybridization code).
41 
42  \param at the atom of interest
43 
44  \return the number of electrons
45 */
46 int countAtomElec(const Atom *at);
47 
48 //! sums up all atomic formal charges and returns the result
49 int getFormalCharge(const ROMol &mol);
50 
51 //! returns whether or not the given Atom is involved in a conjugated bond
52 bool atomHasConjugatedBond(const Atom *at);
53 
54 //! find fragments (disconnected components of the molecular graph)
55 /*!
56 
57  \param mol the molecule of interest
58  \param mapping used to return the mapping of Atoms->fragments.
59  On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
60  and will contain the fragment assignment for each Atom
61 
62  \return the number of fragments found.
63 
64 */
65 unsigned int getMolFrags(const ROMol &mol, std::vector<int> &mapping);
66 //! find fragments (disconnected components of the molecular graph)
67 /*!
68 
69  \param mol the molecule of interest
70  \param frags used to return the Atoms in each fragment
71  On return \c mapping will be \c numFrags long, and each entry
72  will contain the indices of the Atoms in that fragment.
73 
74  \return the number of fragments found.
75 
76 */
77 unsigned int getMolFrags(const ROMol &mol,
78  std::vector<std::vector<int> > &frags);
79 
80 //! splits a molecule into its component fragments
81 // (disconnected components of the molecular graph)
82 /*!
83 
84  \param mol the molecule of interest
85  \param sanitizeFrags toggles sanitization of the fragments after
86  they are built
87  \param frags used to return the mapping of Atoms->fragments.
88  if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
89  on return and will contain the fragment assignment for each Atom
90  \param fragsMolAtomMapping used to return the Atoms in each fragment
91  On return \c mapping will be \c numFrags long, and each entry
92  will contain the indices of the Atoms in that fragment.
93  \param copyConformers toggles copying conformers of the fragments after
94  they are built
95  \return a vector of the fragments as smart pointers to ROMols
96 
97 */
98 std::vector<boost::shared_ptr<ROMol> > getMolFrags(
99  const ROMol &mol, bool sanitizeFrags = true, std::vector<int> *frags = 0,
100  std::vector<std::vector<int> > *fragsMolAtomMapping = 0,
101  bool copyConformers = true);
102 
103 //! splits a molecule into pieces based on labels assigned using a query
104 /*!
105 
106  \param mol the molecule of interest
107  \param query the query used to "label" the molecule for fragmentation
108  \param sanitizeFrags toggles sanitization of the fragments after
109  they are built
110  \param whiteList if provided, only labels in the list will be kept
111  \param negateList if true, the white list logic will be inverted: only labels
112  not in the list will be kept
113 
114  \return a map of the fragments and their labels
115 
116 */
117 template <typename T>
118 std::map<T, boost::shared_ptr<ROMol> > getMolFragsWithQuery(
119  const ROMol &mol, T (*query)(const ROMol &, const Atom *),
120  bool sanitizeFrags = true, const std::vector<T> *whiteList = 0,
121  bool negateList = false);
122 
123 #if 0
124  //! finds a molecule's minimium spanning tree (MST)
125  /*!
126  \param mol the molecule of interest
127  \param mst used to return the MST as a vector of bond indices
128  */
129  void findSpanningTree(const ROMol &mol,std::vector<int> &mst);
130 #endif
131 
132 //! calculates Balaban's J index for the molecule
133 /*!
134  \param mol the molecule of interest
135  \param useBO toggles inclusion of the bond order in the calculation
136  (when false, we're not really calculating the J value)
137  \param force forces the calculation (instead of using cached results)
138  \param bondPath when included, only paths using bonds whose indices occur
139  in this vector will be included in the calculation
140  \param cacheIt If this is true, the calculated value will be cached
141  as a property on the molecule
142  \return the J index
143 
144 */
145 double computeBalabanJ(const ROMol &mol, bool useBO = true, bool force = false,
146  const std::vector<int> *bondPath = 0,
147  bool cacheIt = true);
148 //! \overload
149 double computeBalabanJ(double *distMat, int nb, int nAts);
150 
151 //! \name Dealing with hydrogens
152 //{@
153 
154 //! returns a copy of a molecule with hydrogens added in as explicit Atoms
155 /*!
156  \param mol the molecule to add Hs to
157  \param explicitOnly (optional) if this \c true, only explicit Hs will be
158  added
159  \param addCoords (optional) If this is true, estimates for the atomic
160  coordinates
161  of the added Hs will be used.
162  \param onlyOnAtoms (optional) if provided, this should be a vector of
163  IDs of the atoms that will be considered for H addition.
164 
165  \return the new molecule
166 
167  <b>Notes:</b>
168  - it makes no sense to use the \c addCoords option if the molecule's
169  heavy
170  atoms don't already have coordinates.
171  - the caller is responsible for <tt>delete</tt>ing the pointer this
172  returns.
173  */
174 ROMol *addHs(const ROMol &mol, bool explicitOnly = false,
175  bool addCoords = false, const UINT_VECT *onlyOnAtoms = NULL);
176 //! \overload
177 // modifies the molecule in place
178 void addHs(RWMol &mol, bool explicitOnly = false, bool addCoords = false,
179  const UINT_VECT *onlyOnAtoms = NULL);
180 
181 //! returns a copy of a molecule with hydrogens removed
182 /*!
183  \param mol the molecule to remove Hs from
184  \param implicitOnly (optional) if this \c true, only implicit Hs will be
185  removed
186  \param updateExplicitCount (optional) If this is \c true, when explicit Hs
187  are removed
188  from the graph, the heavy atom to which they are bound will have its
189  counter of
190  explicit Hs increased.
191  \param sanitize: (optional) If this is \c true, the final molecule will be
192  sanitized
193 
194  \return the new molecule
195 
196  <b>Notes:</b>
197  - Hydrogens which aren't connected to a heavy atom will not be
198  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
199  all atoms removed.
200  - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
201  will not be removed.
202  - two coordinate Hs, like the central H in C[H-]C, will not be removed
203  - Hs connected to dummy atoms will not be removed
204 
205  - the caller is responsible for <tt>delete</tt>ing the pointer this
206  returns.
207 */
208 ROMol *removeHs(const ROMol &mol, bool implicitOnly = false,
209  bool updateExplicitCount = false, bool sanitize = true);
210 //! \overload
211 // modifies the molecule in place
212 void removeHs(RWMol &mol, bool implicitOnly = false,
213  bool updateExplicitCount = false, bool sanitize = true);
214 
215 //! returns a copy of a molecule with hydrogens removed and added as queries
216 //! to the heavy atoms to which they are bound.
217 /*!
218  This is really intended to be used with molecules that contain QueryAtoms
219 
220  \param mol the molecule to remove Hs from
221 
222  \return the new molecule
223 
224  <b>Notes:</b>
225  - Atoms that do not already have hydrogen count queries will have one
226  added, other H-related queries will not be touched. Examples:
227  - C[H] -> [C;!H0]
228  - [C;H1][H] -> [C;H1]
229  - [C;H2][H] -> [C;H2]
230  - Hydrogens which aren't connected to a heavy atom will not be
231  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
232  all atoms removed.
233  - the caller is responsible for <tt>delete</tt>ing the pointer this returns.
234  - By default all hydrogens are removed, however if
235  mergeUnmappedOnly is true, any hydrogen participating
236  in an atom map will be retained
237 
238 */
239 ROMol *mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly = false);
240 //! \overload
241 // modifies the molecule in place
242 void mergeQueryHs(RWMol &mol, bool mergeUnmappedOnly = false);
243 
244 typedef enum {
251  ADJUST_IGNOREALL = 0xFFFFFFF
253 
255  bool adjustDegree; /**< add degree queries */
256  boost::uint32_t adjustDegreeFlags;
257  bool adjustRingCount; /**< add ring-count queries */
258  boost::uint32_t adjustRingCountFlags;
259 
260  bool makeDummiesQueries; /**< convert dummy atoms without isotope labels to
261  any-atom queries */
263  bool makeBondsGeneric; /**< convert bonds to generic queries (any bonds) */
264  boost::uint32_t makeBondsGenericFlags;
265  bool makeAtomsGeneric; /**< convert atoms to generic queries (any atoms) */
266  boost::uint32_t makeAtomsGenericFlags;
267 
269  : adjustDegree(true),
270  adjustDegreeFlags(ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS),
271  adjustRingCount(false),
272  adjustRingCountFlags(ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS),
273  makeDummiesQueries(true),
274  aromatizeIfPossible(true),
275  makeBondsGeneric(false),
276  makeBondsGenericFlags(ADJUST_IGNORENONE),
277  makeAtomsGeneric(false),
278  makeAtomsGenericFlags(ADJUST_IGNORENONE) {}
279 };
280 //! returns a copy of a molecule with query properties adjusted
281 /*!
282  \param mol the molecule to adjust
283  \param params controls the adjustments made
284 
285  \return the new molecule
286 */
287 ROMol *adjustQueryProperties(const ROMol &mol,
288  const AdjustQueryParameters *params = NULL);
289 //! \overload
290 // modifies the molecule in place
291 void adjustQueryProperties(RWMol &mol,
292  const AdjustQueryParameters *params = NULL);
293 
294 //! returns a copy of a molecule with the atoms renumbered
295 /*!
296 
297  \param mol the molecule to work with
298  \param newOrder the new ordering of the atoms (should be numAtoms long)
299  for example: if newOrder is [3,2,0,1], then atom 3 in the original
300  molecule will be atom 0 in the new one
301 
302  \return the new molecule
303 
304  <b>Notes:</b>
305  - the caller is responsible for <tt>delete</tt>ing the pointer this returns.
306 
307 */
308 ROMol *renumberAtoms(const ROMol &mol,
309  const std::vector<unsigned int> &newOrder);
310 
311 //@}
312 
313 //! \name Sanitization
314 //@{
315 
316 typedef enum {
328  SANITIZE_ALL = 0xFFFFFFF
329 } SanitizeFlags;
330 
331 //! \brief carries out a collection of tasks for cleaning up a molecule and
332 // ensuring
333 //! that it makes "chemical sense"
334 /*!
335  This functions calls the following in sequence
336  -# MolOps::cleanUp()
337  -# mol.updatePropertyCache()
338  -# MolOps::symmetrizeSSSR()
339  -# MolOps::Kekulize()
340  -# MolOps::assignRadicals()
341  -# MolOps::setAromaticity()
342  -# MolOps::setConjugation()
343  -# MolOps::setHybridization()
344  -# MolOps::cleanupChirality()
345  -# MolOps::adjustHs()
346 
347  \param mol : the RWMol to be cleaned
348 
349  \param operationThatFailed : the first (if any) sanitization operation that
350  fails is set here.
351  The values are taken from the \c SanitizeFlags
352  enum.
353  On success, the value is \c
354  SanitizeFlags::SANITIZE_NONE
355 
356  \param sanitizeOps : the bits here are used to set which sanitization
357  operations are carried
358  out. The elements of the \c SanitizeFlags enum define
359  the operations.
360 
361  <b>Notes:</b>
362  - If there is a failure in the sanitization, a \c SanitException
363  will be thrown.
364  - in general the user of this function should cast the molecule following
365  this
366  function to a ROMol, so that new atoms and bonds cannot be added to the
367  molecule and screw up the sanitizing that has been done here
368 */
369 void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed,
370  unsigned int sanitizeOps = SANITIZE_ALL);
371 //! \overload
372 void sanitizeMol(RWMol &mol);
373 
374 //! Possible aromaticity models
375 /*!
376 - \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT
377 - \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit
378 Book)
379 - \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it
380 does not consider the outer envelope of fused rings)
381 - \c AROMATICITY_CUSTOM uses a caller-provided function
382 */
383 typedef enum {
384  AROMATICITY_DEFAULT = 0x0, ///< future proofing
387  AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function
389 
390 //! Sets up the aromaticity for a molecule
391 /*!
392 
393  This is what happens here:
394  -# find all the simple rings by calling the findSSSR function
395  -# loop over all the Atoms in each ring and mark them if they are
396  candidates
397  for aromaticity. A ring atom is a candidate if it can spare electrons
398  to the ring and if it's from the first two rows of the periodic table.
399  -# based on the candidate atoms, mark the rings to be either candidates
400  or non-candidates. A ring is a candidate only if all its atoms are
401  candidates
402  -# apply Hueckel rule to each of the candidate rings to check if the ring
403  can be
404  aromatic
405 
406  \param mol the RWMol of interest
407  \param model the aromaticity model to use
408  \param func a custom function for assigning aromaticity (only used when
409  model=\c AROMATICITY_CUSTOM)
410 
411  \return >0 on success, <= 0 otherwise
412 
413  <b>Assumptions:</b>
414  - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
415  been called)
416 
417 */
419  int (*func)(RWMol &) = NULL);
420 
421 //! Designed to be called by the sanitizer to handle special cases before
422 // anything is done.
423 /*!
424 
425  Currently this:
426  - modifies nitro groups, so that the nitrogen does not have an unreasonable
427  valence of 5, as follows:
428  - the nitrogen gets a positive charge
429  - one of the oxygens gets a negative chage and the double bond to this
430  oxygen is changed to a single bond
431  The net result is that nitro groups can be counted on to be:
432  \c "[N+](=O)[O-]"
433  - modifies halogen-oxygen containing species as follows:
434  \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
435  \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
436  \c [Cl,Br,I](=O)O -> [X+]([O-])O
437  - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
438 
439  \param mol the molecule of interest
440 
441 */
442 void cleanUp(RWMol &mol);
443 
444 //! Called by the sanitizer to assign radical counts to atoms
445 void assignRadicals(RWMol &mol);
446 
447 //! adjust the number of implicit and explicit Hs for special cases
448 /*!
449 
450  Currently this:
451  - modifies aromatic nitrogens so that, when appropriate, they have an
452  explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
453 
454  \param mol the molecule of interest
455 
456  <b>Assumptions</b>
457  - this is called after the molecule has been sanitized,
458  aromaticity has been perceived, and the implicit valence of
459  everything has been calculated.
460 
461 */
462 void adjustHs(RWMol &mol);
463 
464 //! Kekulizes the molecule
465 /*!
466 
467  \param mol the molecule of interest
468  \param markAtomsBonds if this is set to true, \c isAromatic boolean settings
469  on both the Bonds and Atoms are turned to false
470  following
471  the Kekulization, otherwise they are left alone in
472  their
473  original state.
474  \param maxBackTracks the maximum number of attempts at back-tracking. The
475  algorithm
476  uses a back-tracking procedure to revist a previous
477  setting of
478  double bond if we hit a wall in the kekulization
479  process
480 
481  <b>Notes:</b>
482  - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
483  bonds will be changed from \c RDKit::Bond::AROMATIC to \c
484  RDKit::Bond::SINGLE
485  or RDKit::Bond::DOUBLE during Kekulization.
486 
487 */
488 void Kekulize(RWMol &mol, bool markAtomsBonds = true,
489  unsigned int maxBackTracks = 100);
490 
491 //! flags the molecule's conjugated bonds
492 void setConjugation(ROMol &mol);
493 
494 //! calculates and sets the hybridization of all a molecule's Stoms
495 void setHybridization(ROMol &mol);
496 
497 // @}
498 
499 //! \name Ring finding and SSSR
500 //@{
501 
502 //! finds a molecule's Smallest Set of Smallest Rings
503 /*!
504  Currently this implements a modified form of Figueras algorithm
505  (JCICS - Vol. 36, No. 5, 1996, 986-991)
506 
507  \param mol the molecule of interest
508  \param res used to return the vector of rings. Each entry is a vector with
509  atom indices. This information is also stored in the molecule's
510  RingInfo structure, so this argument is optional (see overload)
511 
512  \return number of smallest rings found
513 
514  Base algorithm:
515  - The original algorithm starts by finding representative degree 2
516  nodes.
517  - Representative because if a series of deg 2 nodes are found only
518  one of them is picked.
519  - The smallest ring around each of them is found.
520  - The bonds that connect to this degree 2 node are them chopped off,
521  yielding
522  new deg two nodes
523  - The process is repeated on the new deg 2 nodes.
524  - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
525  with it is found. A bond from this is "carefully" (look in the paper)
526  selected and chopped, yielding deg 2 nodes. The process is same as
527  above once this is done.
528 
529  Our Modifications:
530  - If available, more than one smallest ring around a representative deg 2
531  node will be computed and stored
532  - Typically 3 rings are found around a degree 3 node (when no deg 2s are
533  available)
534  and all the bond to that node are chopped.
535  - The extra rings that were found in this process are removed after all the
536  nodes
537  have been covered.
538 
539  These changes were motivated by several factors:
540  - We believe the original algorithm fails to find the correct SSSR
541  (finds the correct number of them but the wrong ones) on some sample mols
542  - Since SSSR may not be unique, a post-SSSR step to symmetrize may be done.
543  The extra rings this process adds can be quite useful.
544 */
545 int findSSSR(const ROMol &mol, std::vector<std::vector<int> > &res);
546 //! \overload
547 int findSSSR(const ROMol &mol, std::vector<std::vector<int> > *res = 0);
548 
549 //! use a DFS algorithm to identify ring bonds and atoms in a molecule
550 /*!
551  \b NOTE: though the RingInfo structure is populated by this function,
552  the only really reliable calls that can be made are to check if
553  mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
554  return values >0
555 */
556 void fastFindRings(const ROMol &mol);
557 
558 //! symmetrize the molecule's Smallest Set of Smallest Rings
559 /*!
560  SSSR rings obatined from "findSSSR" can be non-unique in some case.
561  For example, cubane has five SSSR rings, not six as one would hope.
562 
563  This function adds additional rings to the SSSR list if necessary
564  to make the list symmetric, e.g. all atoms in cubane will be part of the same
565  number
566  of SSSRs. This function choses these extra rings from the extra rings
567  computed
568  and discarded during findSSSR. The new ring are chosen such that:
569  - replacing a same sized ring in the SSSR list with an extra ring yields
570  the same union of bond IDs as the orignal SSSR list
571 
572  \param mol - the molecule of interest
573  \param res used to return the vector of rings. Each entry is a vector with
574  atom indices. This information is also stored in the molecule's
575  RingInfo structure, so this argument is optional (see overload)
576 
577  \return the total number of rings = (new rings + old SSSRs)
578 
579  <b>Notes:</b>
580  - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
581  first
582 */
583 int symmetrizeSSSR(ROMol &mol, std::vector<std::vector<int> > &res);
584 //! \overload
585 int symmetrizeSSSR(ROMol &mol);
586 
587 //@}
588 
589 //! \name Shortest paths and other matrices
590 //@{
591 
592 //! returns a molecule's adjacency matrix
593 /*!
594  \param mol the molecule of interest
595  \param useBO toggles use of bond orders in the matrix
596  \param emptyVal sets the empty value (for non-adjacent atoms)
597  \param force forces calculation of the matrix, even if already
598  computed
599  \param propNamePrefix used to set the cached property name
600 
601  \return the adjacency matrix.
602 
603  <b>Notes</b>
604  - The result of this is cached in the molecule's local property dictionary,
605  which will handle deallocation. The caller should <b>not</b> \c delete
606  this pointer.
607 
608 */
609 double *getAdjacencyMatrix(const ROMol &mol, bool useBO = false,
610  int emptyVal = 0, bool force = false,
611  const char *propNamePrefix = 0,
612  const boost::dynamic_bitset<> *bondsToUse = 0);
613 
614 //! Computes the molecule's topological distance matrix
615 /*!
616  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
617 
618  \param mol the molecule of interest
619  \param useBO toggles use of bond orders in the matrix
620  \param useAtomWts sets the diagonal elements of the result to
621  6.0/(atomic number) so that the matrix can be used to calculate
622  Balaban J values. This does not affect the bond weights.
623  \param force forces calculation of the matrix, even if already
624  computed
625  \param propNamePrefix used to set the cached property name
626 
627  \return the distance matrix.
628 
629  <b>Notes</b>
630  - The result of this is cached in the molecule's local property dictionary,
631  which will handle deallocation. The caller should <b>not</b> \c delete
632  this pointer.
633 
634 
635 */
636 double *getDistanceMat(const ROMol &mol, bool useBO = false,
637  bool useAtomWts = false, bool force = false,
638  const char *propNamePrefix = 0);
639 
640 //! Computes the molecule's topological distance matrix
641 /*!
642  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
643 
644  \param mol the molecule of interest
645  \param activeAtoms only elements corresponding to these atom indices
646  will be included in the calculation
647  \param bonds only bonds found in this list will be included in the
648  calculation
649  \param useBO toggles use of bond orders in the matrix
650  \param useAtomWts sets the diagonal elements of the result to
651  6.0/(atomic number) so that the matrix can be used to calculate
652  Balaban J values. This does not affect the bond weights.
653 
654  \return the distance matrix.
655 
656  <b>Notes</b>
657  - The results of this call are not cached, the caller <b>should</b> \c
658  delete
659  this pointer.
660 
661 
662 */
663 double *getDistanceMat(const ROMol &mol, const std::vector<int> &activeAtoms,
664  const std::vector<const Bond *> &bonds,
665  bool useBO = false, bool useAtomWts = false);
666 
667 //! Computes the molecule's 3D distance matrix
668 /*!
669 
670  \param mol the molecule of interest
671  \param confId the conformer to use
672  \param useAtomWts sets the diagonal elements of the result to
673  6.0/(atomic number)
674  \param force forces calculation of the matrix, even if already
675  computed
676  \param propNamePrefix used to set the cached property name
677  (if set to an empty string, the matrix will not be
678  cached)
679 
680  \return the distance matrix.
681 
682  <b>Notes</b>
683  - The result of this is cached in the molecule's local property dictionary,
684  which will handle deallocation. Do the caller should <b>not</b> \c delete
685  this pointer.
686 
687 */
688 double *get3DDistanceMat(const ROMol &mol, int confId = -1,
689  bool useAtomWts = false, bool force = false,
690  const char *propNamePrefix = 0);
691 //! Find the shortest path between two atoms
692 /*!
693  Uses the Bellman-Ford algorithm
694 
695  \param mol molecule of interest
696  \param aid1 index of the first atom
697  \param aid2 index of the second atom
698 
699  \return an std::list with the indices of the atoms along the shortest
700  path
701 
702  <b>Notes:</b>
703  - the starting and end atoms are included in the path
704  - if no path is found, an empty path is returned
705 
706 */
707 std::list<int> getShortestPath(const ROMol &mol, int aid1, int aid2);
708 
709 //@}
710 
711 #if 0
712  //! \name Canonicalization
713  //@{
714 
715  //! assign a canonical ordering to a molecule's atoms
716  /*!
717  The algorithm used here is a modification of the published Daylight canonical
718  smiles algorithm (i.e. it uses atom invariants and products of primes).
719 
720  \param mol the molecule of interest
721  \param ranks used to return the ranks
722  \param breakTies toggles breaking of ties (see below)
723  \param includeChirality toggles inclusion of chirality in the invariants
724  \param includeIsotopes toggles inclusion of isotopes in the invariants
725  \param rankHistory used to return the rank history (see below)
726 
727  <b>Notes:</b>
728  - Tie breaking should be done when it's important to have a full ordering
729  of the atoms (e.g. when generating canonical traversal trees). If it's
730  acceptable to have ties between symmetry-equivalent atoms (e.g. when
731  generating CIP codes), tie breaking can/should be skipped.
732  - if the \c rankHistory argument is provided, the evolution of the ranks of
733  individual atoms will be tracked. The \c rankHistory pointer should be
734  to a VECT_INT_VECT that has at least \c mol.getNumAtoms() elements.
735  */
736  void rankAtoms(const ROMol &mol,std::vector<int> &ranks,
737  bool breakTies=true,
738  bool includeChirality=true,
739  bool includeIsotopes=true,
740  std::vector<std::vector<int> > *rankHistory=0);
741  //! assign a canonical ordering to a sub-molecule's atoms
742  /*!
743  The algorithm used here is a modification of the published Daylight canonical
744  smiles algorithm (i.e. it uses atom invariants and products of primes).
745 
746  \param mol the molecule of interest
747  \param atomsToUse atoms to be included
748  \param bondsToUse bonds to be included
749  \param atomSymbols symbols to use for the atoms in the output (these are
750  used in place of atomic number and isotope information)
751  \param ranks used to return the ranks
752  \param breakTies toggles breaking of ties (see below)
753  \param rankHistory used to return the rank history (see below)
754 
755  <b>Notes:</b>
756  - Tie breaking should be done when it's important to have a full ordering
757  of the atoms (e.g. when generating canonical traversal trees). If it's
758  acceptable to have ties between symmetry-equivalent atoms (e.g. when
759  generating CIP codes), tie breaking can/should be skipped.
760  - if the \c rankHistory argument is provided, the evolution of the ranks of
761  individual atoms will be tracked. The \c rankHistory pointer should be
762  to a VECT_INT_VECT that has at least \c mol.getNumAtoms() elements.
763  */
764  void rankAtomsInFragment(const ROMol &mol,std::vector<int> &ranks,
765  const boost::dynamic_bitset<> &atomsToUse,
766  const boost::dynamic_bitset<> &bondsToUse,
767  const std::vector<std::string> *atomSymbols=0,
768  const std::vector<std::string> *bondSymbols=0,
769  bool breakTies=true,
770  std::vector<std::vector<int> > *rankHistory=0);
771 
772  // @}
773 #endif
774 //! \name Stereochemistry
775 //@{
776 
777 //! removes bogus chirality markers (those on non-sp3 centers):
778 void cleanupChirality(RWMol &mol);
779 
780 //! \brief Uses a conformer to assign ChiralType to a molecule's atoms
781 /*!
782  \param mol the molecule of interest
783  \param confId the conformer to use
784  \param replaceExistingTags if this flag is true, any existing atomic chiral
785  tags will be replaced
786 
787  If the conformer provided is not a 3D conformer, nothing will be done.
788 */
789 void assignChiralTypesFrom3D(ROMol &mol, int confId = -1,
790  bool replaceExistingTags = true);
791 
792 //! Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
793 /*!
794 
795  \param mol the molecule of interest
796  \param cleanIt toggles removal of stereo flags from double bonds that can
797  not have stereochemistry
798  \param force forces the calculation to be repeated even if it has
799  already been done
800  \param flagPossibleStereoCenters set the _ChiralityPossible property on
801  atoms that are possible stereocenters
802 
803  <b>Notes:M</b>
804  - Throughout we assume that we're working with a hydrogen-suppressed
805  graph.
806 
807 */
808 void assignStereochemistry(ROMol &mol, bool cleanIt = false, bool force = false,
809  bool flagPossibleStereoCenters = false);
810 //! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
811 //(i.e. Z/E)
812 /*!
813 
814  \param mol the molecule of interest
815 */
816 void removeStereochemistry(ROMol &mol);
817 
818 //! \brief finds bonds that could be cis/trans in a molecule and mark them as
819 //! Bond::STEREONONE
820 /*!
821  \param mol the molecule of interest
822  \param cleanIt toggles removal of stereo flags from double bonds that can
823  not have stereochemistry
824 
825  This function is usefuly in two situations
826  - when parsing a mol file; for the bonds marked here, coordinate
827  informations
828  on the neighbors can be used to indentify cis or trans states
829  - when writing a mol file; bonds that can be cis/trans but not marked as
830  either
831  need to be specially marked in the mol file
832 */
833 void findPotentialStereoBonds(ROMol &mol, bool cleanIt = false);
834 //@}
835 
836 //! returns the number of atoms which have a particular property set
837 unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop);
838 
839 }; // end of namespace MolOps
840 }; // end of namespace RDKit
841 
842 #endif
ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=0, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
boost::uint32_t adjustDegreeFlags
Definition: MolOps.h:256
void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
double computeBalabanJ(const ROMol &mol, bool useBO=true, bool force=false, const std::vector< int > *bondPath=0, bool cacheIt=true)
calculates Balaban&#39;s J index for the molecule
void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralType to a molecule&#39;s atoms.
int findSSSR(const ROMol &mol, std::vector< std::vector< int > > &res)
finds a molecule&#39;s Smallest Set of Smallest Rings
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:30
ROMol * addHs(const ROMol &mol, bool explicitOnly=false, bool addCoords=false, const UINT_VECT *onlyOnAtoms=NULL)
returns a copy of a molecule with hydrogens added in as explicit Atoms
const int ci_LOCAL_INF
int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int > > &res)
symmetrize the molecule&#39;s Smallest Set of Smallest Rings
boost::uint32_t makeBondsGenericFlags
Definition: MolOps.h:264
AromaticityModel
Possible aromaticity models.
Definition: MolOps.h:383
boost::uint32_t adjustRingCountFlags
Definition: MolOps.h:258
boost::uint32_t makeAtomsGenericFlags
Definition: MolOps.h:266
void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and
ROMol * adjustQueryProperties(const ROMol &mol, const AdjustQueryParameters *params=NULL)
returns a copy of a molecule with query properties adjusted
int countAtomElec(const Atom *at)
return the number of electrons available on an atom to donate for
INVAR_VECT::iterator INVAR_VECT_I
Definition: MolOps.h:28
void cleanupChirality(RWMol &mol)
removes bogus chirality markers (those on non-sp3 centers):
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:103
bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop)
returns the number of atoms which have a particular property set
double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=0)
Computes the molecule&#39;s topological distance matrix.
int setAromaticity(RWMol &mol, AromaticityModel model=AROMATICITY_DEFAULT, int(*func)(RWMol &)=NULL)
Sets up the aromaticity for a molecule.
void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
void setConjugation(ROMol &mol)
flags the molecule&#39;s conjugated bonds
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition: MolOps.h:29
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false)
void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
void removeStereochemistry(ROMol &mol)
Removes all stereochemistry information from atoms (i.e. R/S) and bonds.
class for representing a bond
Definition: Bond.h:47
void cleanUp(RWMol &mol)
Designed to be called by the sanitizer to handle special cases before.
void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule&#39;s Stoms
double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=0, const boost::dynamic_bitset<> *bondsToUse=0)
returns a molecule&#39;s adjacency matrix
std::vector< UINT > UINT_VECT
Definition: types.h:207
std::vector< double > INVAR_VECT
Definition: MolOps.h:26
double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=0)
Computes the molecule&#39;s 3D distance matrix.
void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
AdjustQueryWhichFlags
Definition: MolOps.h:244
void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREONONE ...
The class for representing atoms.
Definition: Atom.h:68