RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MolOps.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2024 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_MOL_OPS_H
12#define RD_MOL_OPS_H
13
14#include <vector>
15#include <map>
16#include <list>
18#include <boost/smart_ptr.hpp>
19#include <boost/dynamic_bitset.hpp>
21#include <RDGeneral/types.h>
23#include "SanitException.h"
25
26RDKIT_GRAPHMOL_EXPORT extern const int ci_LOCAL_INF;
27namespace RDKit {
28class ROMol;
29class RWMol;
30class Atom;
31class Bond;
32class Conformer;
33typedef std::vector<double> INVAR_VECT;
34typedef INVAR_VECT::iterator INVAR_VECT_I;
35typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
36
37//! \brief Groups a variety of molecular query and transformation operations.
38namespace MolOps {
39
40//! return the number of electrons available on an atom to donate for
41/// aromaticity
42/*!
43 The result is determined using the default valency, number of lone pairs,
44 number of bonds and the formal charge. Note that the atom may not donate
45 all of these electrons to a ring for aromaticity (also used in Conjugation
46 and hybridization code).
47
48 \param at the atom of interest
49
50 \return the number of electrons
51*/
53
54//! sums up all atomic formal charges and returns the result
56
57//! returns whether or not the given Atom is involved in a conjugated bond
59
60//! find fragments (disconnected components of the molecular graph)
61/*!
62
63 \param mol the molecule of interest
64 \param mapping used to return the mapping of Atoms->fragments.
65 On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
66 and will contain the fragment assignment for each Atom
67
68 \return the number of fragments found.
69
70*/
71RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol,
72 std::vector<int> &mapping);
73//! find fragments (disconnected components of the molecular graph)
74/*!
75
76 \param mol the molecule of interest
77 \param frags used to return the Atoms in each fragment
78 On return \c mapping will be \c numFrags long, and each entry
79 will contain the indices of the Atoms in that fragment.
80
81 \return the number of fragments found.
82
83*/
85 const ROMol &mol, std::vector<std::vector<int>> &frags);
86
87//! splits a molecule into its component fragments
88/// (disconnected components of the molecular graph)
89/*!
90
91 \param mol the molecule of interest
92 \param molFrags used to return the disconnected fragments as molecules.
93 Any contents on input will be cleared.
94 \param sanitizeFrags toggles sanitization of the fragments after
95 they are built
96 \param frags used to return the mapping of Atoms->fragments.
97 if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
98 on return and will contain the fragment assignment for each Atom.
99 \param fragsMolAtomMapping used to return the Atoms in each fragment
100 On return \c mapping will be \c numFrags long, and each entry
101 will contain the indices of the Atoms in that fragment.
102 \param copyConformers toggles copying conformers of the fragments after
103 they are built
104 \return the number of fragments found.
105
106*/
108 const ROMol &mol, std::vector<std::unique_ptr<ROMol>> &molFrags,
109 bool sanitizeFrags = true, std::vector<int> *frags = nullptr,
110 std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
111 bool copyConformers = true);
112
113//! splits a molecule into its component fragments
114/// (disconnected components of the molecular graph)
115/*!
116
117 \param mol the molecule of interest
118 \param sanitizeFrags toggles sanitization of the fragments after
119 they are built
120 \param frags used to return the mapping of Atoms->fragments.
121 if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
122 on return and will contain the fragment assignment for each Atom
123 \param fragsMolAtomMapping used to return the Atoms in each fragment
124 On return \c mapping will be \c numFrags long, and each entry
125 will contain the indices of the Atoms in that fragment.
126 \param copyConformers toggles copying conformers of the fragments after
127 they are built
128 \return a vector of the fragments as smart pointers to ROMols
129
130*/
131RDKIT_GRAPHMOL_EXPORT std::vector<boost::shared_ptr<ROMol>> getMolFrags(
132 const ROMol &mol, bool sanitizeFrags = true,
133 std::vector<int> *frags = nullptr,
134 std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
135 bool copyConformers = true);
136
137//! splits a molecule into pieces based on labels assigned using a query
138/*!
139
140 \param mol the molecule of interest
141 \param query the query used to "label" the molecule for fragmentation
142 \param sanitizeFrags toggles sanitization of the fragments after
143 they are built
144 \param whiteList if provided, only labels in the list will be kept
145 \param negateList if true, the white list logic will be inverted: only labels
146 not in the list will be kept
147
148 \return a map of the fragments and their labels
149
150*/
151
152template <typename T>
153RDKIT_GRAPHMOL_EXPORT std::map<T, boost::shared_ptr<ROMol>>
154getMolFragsWithQuery(const ROMol &mol, T (*query)(const ROMol &, const Atom *),
155 bool sanitizeFrags = true,
156 const std::vector<T> *whiteList = nullptr,
157 bool negateList = false);
158//! splits a molecule into pieces based on labels assigned using a query,
159//! putting them into a map of std::unique_ptr<ROMol>.
160/*!
161
162 \param mol the molecule of interest
163 \param query the query used to "label" the molecule for fragmentation
164 \param molFrags used to return the disconnected fragments as molecules.
165 Any contents on input will be cleared.
166 \param sanitizeFrags toggles sanitization of the fragments after
167 they are built
168 \param whiteList if provided, only labels in the list will be kept
169 \param negateList if true, the white list logic will be inverted: only labels
170 not in the list will be kept
171
172 \return the number of fragments
173
174*/
175template <typename T>
177 const ROMol &mol, T (*query)(const ROMol &, const Atom *),
178 std::map<T, std::unique_ptr<ROMol>> &molFrags, bool sanitizeFrags = true,
179 const std::vector<T> *whiteList = nullptr, bool negateList = false);
180
181#if 0
182 //! finds a molecule's minimum spanning tree (MST)
183 /*!
184 \param mol the molecule of interest
185 \param mst used to return the MST as a vector of bond indices
186 */
187 RDKIT_GRAPHMOL_EXPORT void findSpanningTree(const ROMol &mol,std::vector<int> &mst);
188#endif
189
190//! \name Dealing with hydrogens
191//{@
192
194 bool explicitOnly = false; /**< only add explicit Hs */
195 bool addCoords = false; /**< add coordinates for the Hs */
196 bool addResidueInfo = false; /**< add residue info to the Hs */
197 bool skipQueries =
198 false; /**< do not add Hs to query atoms or atoms with query bonds */
199};
200//! adds Hs to a molecule as explicit Atoms
201/*!
202 \param mol the molecule to add Hs to
203 \param params parameters controlling which Hs are added.
204 \param onlyOnAtoms (optional) if provided, this should be a vector of
205 IDs of the atoms that will be considered for H addition.
206
207 <b>Notes:</b>
208 - it makes no sense to use the \c addCoords option if the molecule's
209 heavy atoms don't already have coordinates.
210 - the molecule is modified
211 */
213 const UINT_VECT *onlyOnAtoms = nullptr);
214
215//! returns a copy of a molecule with hydrogens added in as explicit Atoms
216/*!
217 \param mol the molecule to add Hs to
218 \param explicitOnly (optional) if this \c true, only explicit Hs will be
219 added
220 \param addCoords (optional) If this is true, estimates for the atomic
221 coordinates
222 of the added Hs will be used.
223 \param onlyOnAtoms (optional) if provided, this should be a vector of
224 IDs of the atoms that will be considered for H addition.
225 \param addResidueInfo (optional) if this is true, add residue info to
226 hydrogen atoms (useful for PDB files).
227
228 \return the new molecule
229
230 <b>Notes:</b>
231 - it makes no sense to use the \c addCoords option if the molecule's
232 heavy
233 atoms don't already have coordinates.
234 - the caller is responsible for <tt>delete</tt>ing the pointer this
235 returns.
236 */
237inline ROMol *addHs(const ROMol &mol, bool explicitOnly = false,
238 bool addCoords = false,
239 const UINT_VECT *onlyOnAtoms = nullptr,
240 bool addResidueInfo = false) {
241 AddHsParameters ps{explicitOnly, addCoords, addResidueInfo};
242 std::unique_ptr<RWMol> res{new RWMol(mol)};
244 return static_cast<ROMol *>(res.release());
245}
246//! \overload
247/// modifies the molecule in place
248inline void addHs(RWMol &mol, bool explicitOnly = false, bool addCoords = false,
249 const UINT_VECT *onlyOnAtoms = nullptr,
250 bool addResidueInfo = false) {
251 AddHsParameters ps{explicitOnly, addCoords, addResidueInfo};
252 addHs(mol, ps, onlyOnAtoms);
253}
254
255//! Sets Cartesian coordinates for a terminal atom.
256//! Useful for growing an atom off a molecule with sensible
257//! coordinates based on the geometry of the neighbor.
258/*!
259 NOTE: this sets appropriate coordinates in all of the molecule's
260 conformers.
261
262 \param mol the molecule the atoms belong to
263 \param idx index of the terminal atom whose coordinates are set
264 \param otherIdx index of the bonded neighbor atom
265*/
266
268 unsigned int otherIdx);
269
270//! returns a copy of a molecule with hydrogens removed
271/*!
272 \param mol the molecule to remove Hs from
273 \param implicitOnly if this \c true, only implicit Hs will be
274 removed
275 \param updateExplicitCount (optional) If this is \c true, when explicit
276 Hs are removed from the graph, the heavy atom to which they are bound will
277 have its counter of explicit Hs increased.
278 \param sanitize: (optional) If this is \c true, the final molecule will be
279 sanitized
280
281 \return the new molecule
282
283 <b>Notes:</b>
284 - Hydrogens which aren't connected to a heavy atom will not be
285 removed. This prevents molecules like <tt>"[H][H]"</tt> from having
286 all atoms removed.
287 - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
288 will not be removed.
289 - two coordinate Hs, like the central H in C[H-]C, will not be removed
290 - Hs connected to dummy atoms will not be removed
291 - Hs that are part of the definition of double bond Stereochemistry
292 will not be removed
293 - Hs that are not connected to anything else will not be removed
294 - Hs that have a query defined (i.e. hasQuery() returns true) will not
295 be removed
296
297 - the caller is responsible for <tt>delete</tt>ing the pointer this
298 returns.
299*/
300[[deprecated("Please use the version with RemoveHsParameters")]]
302 bool updateExplicitCount = false,
303 bool sanitize = true);
304//! \overload
305/// modifies the molecule in place
306[[deprecated("Please use the version with RemoveHsParameters")]]
308 bool updateExplicitCount = false,
309 bool sanitize = true);
311 bool removeDegreeZero = false; /**< hydrogens that have no bonds */
312 bool removeHigherDegrees = false; /**< hydrogens with two (or more) bonds */
313 bool removeOnlyHNeighbors =
314 false; /**< hydrogens with bonds only to other hydrogens */
315 bool removeIsotopes = false; /**< hydrogens with non-default isotopes */
316 bool removeAndTrackIsotopes = false; /**< removes hydrogens with non-default
317 isotopes and keeps track of the heavy atom the isotopes were attached to in
318 the private _isotopicHs atom property, so they are re-added by AddHs() as
319 the original isotopes if possible*/
320 bool removeDummyNeighbors =
321 false; /**< hydrogens with at least one dummy-atom neighbor */
322 bool removeDefiningBondStereo =
323 false; /**< hydrogens defining bond stereochemistry */
324 bool removeWithWedgedBond = true; /**< hydrogens with wedged bonds to them */
325 bool removeWithQuery = false; /**< hydrogens with queries defined */
326 bool removeMapped = true; /**< mapped hydrogens */
327 bool removeInSGroups = true; /**< part of a SubstanceGroup.
328 An H atom will only be removed if it doesn't cause any SGroup to become empty,
329 and if it doesn't play a special role in the SGroup (XBOND, attach point
330 or a CState) */
331 bool showWarnings = true; /**< display warnings for Hs that are not removed */
332 bool removeNonimplicit = true; /**< DEPRECATED equivalent of !implicitOnly */
333 bool updateExplicitCount =
334 false; /**< DEPRECATED equivalent of updateExplicitCount */
335 bool removeHydrides = true; /**< Removing Hydrides */
336 bool removeNontetrahedralNeighbors =
337 false; /**< remove Hs which are bonded to atoms with specified
338 non-tetrahedral stereochemistry */
339};
340
341//! \overload
342/// modifies the molecule in place
345 bool sanitize = true);
346//! \overload
347/// The caller owns the pointer this returns
349 const ROMol &mol, const RemoveHsParameters &ps = RemoveHsParameters(),
350 bool sanitize = true);
351
352//! removes all Hs from a molecule
353RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize = true);
354//! \overload
355/// The caller owns the pointer this returns
357 bool sanitize = true);
358
359//! returns a copy of a molecule with hydrogens removed and added as queries
360//! to the heavy atoms to which they are bound.
361/*!
362 This is really intended to be used with molecules that contain QueryAtoms
363
364 \param mol the molecule to remove Hs from
365
366 \return the new molecule
367
368 <b>Notes:</b>
369 - Atoms that do not already have hydrogen count queries will have one
370 added, other H-related queries will not be touched. Examples:
371 - C[H] -> [C;!H0]
372 - [C;H1][H] -> [C;H1]
373 - [C;H2][H] -> [C;H2]
374 - Hydrogens which aren't connected to a heavy atom will not be
375 removed. This prevents molecules like <tt>"[H][H]"</tt> from having
376 all atoms removed.
377 - the caller is responsible for <tt>delete</tt>ing the pointer this
378 returns.
379 - By default all hydrogens are removed, however if
380 mergeUnmappedOnly is true, any hydrogen participating
381 in an atom map will be retained
382
383*/
385 bool mergeUnmappedOnly = false,
386 bool mergeIsotopes = false);
387//! \overload
388/// modifies the molecule in place
390 bool mergeUnmappedOnly = false,
391 bool mergeIsotopes = false);
392
393//! returns a pair of booleans (hasQueryHs, hasUnmergaebleQueryHs)
394/*!
395 This is really intended to be used with molecules that contain QueryAtoms
396 such as when checking smarts patterns for explicit hydrogens
397
398
399 \param mol the molecule to check for query Hs from
400 \return std::pair if pair.first is true if the molecule has query
401 hydrogens, if pair.second is true, the queryHs cannot be removed my
402 mergeQueryHs
403*/
404RDKIT_GRAPHMOL_EXPORT std::pair<bool, bool> hasQueryHs(const ROMol &mol);
405
415
416//! Parameters controlling the behavior of MolOps::adjustQueryProperties
417/*!
418
419 Note that some of the options here are either directly contradictory or make
420 no sense when combined with each other. We generally assume that client code
421 is doing something sensible and don't attempt to detect possible conflicts
422 or problems.
423
424*/
426 bool adjustDegree = true; /**< add degree queries */
427 std::uint32_t adjustDegreeFlags = ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS;
428
429 bool adjustRingCount = false; /**< add ring-count queries */
430 std::uint32_t adjustRingCountFlags =
432
433 bool makeDummiesQueries = true; /**< convert dummy atoms without isotope
434 labels to any-atom queries */
435
436 bool aromatizeIfPossible = true; /**< perceive and set aromaticity */
437
438 bool makeBondsGeneric =
439 false; /**< convert bonds to generic queries (any bonds) */
440 std::uint32_t makeBondsGenericFlags = ADJUST_IGNORENONE;
441
442 bool makeAtomsGeneric =
443 false; /**< convert atoms to generic queries (any atoms) */
444 std::uint32_t makeAtomsGenericFlags = ADJUST_IGNORENONE;
445
446 bool adjustHeavyDegree = false; /**< adjust the heavy-atom degree instead of
447 overall degree */
448 std::uint32_t adjustHeavyDegreeFlags =
450
451 bool adjustRingChain = false; /**< add ring-chain queries */
452 std::uint32_t adjustRingChainFlags = ADJUST_IGNORENONE;
453
454 bool useStereoCareForBonds =
455 false; /**< remove stereochemistry info from double bonds that do not
456 have the stereoCare property set */
457
458 bool adjustConjugatedFiveRings =
459 false; /**< sets bond queries in conjugated five-rings to
460 SINGLE|DOUBLE|AROMATIC */
461
462 bool setMDLFiveRingAromaticity =
463 false; /**< uses the 5-ring aromaticity behavior of the (former) MDL
464 software as documented in the Chemical Representation Guide */
465
466 bool adjustSingleBondsToDegreeOneNeighbors =
467 false; /**< sets single bonds between aromatic or conjugated atoms and
468 degree one neighbors to SINGLE|AROMATIC */
469
470 bool adjustSingleBondsBetweenAromaticAtoms =
471 false; /**< sets non-ring single bonds between two aromatic or
472 conjugated atoms to SINGLE|AROMATIC */
473
474 //! \brief returns an AdjustQueryParameters object with all adjustments
475 //! disabled
478 res.adjustDegree = false;
479 res.makeDummiesQueries = false;
480 res.aromatizeIfPossible = false;
481 return res;
482 }
484};
485
486//! updates an AdjustQueryParameters object from a JSON string
488 MolOps::AdjustQueryParameters &p, const std::string &json);
489
490//! returns a copy of a molecule with query properties adjusted
491/*!
492 \param mol the molecule to adjust
493 \param params controls the adjustments made
494
495 \return the new molecule, the caller owns the memory
496*/
498 const ROMol &mol, const AdjustQueryParameters *params = nullptr);
499//! \overload
500/// modifies the molecule in place
502 RWMol &mol, const AdjustQueryParameters *params = nullptr);
503
504//! returns a copy of a molecule with the atoms renumbered
505/*!
506
507 \param mol the molecule to work with
508 \param newOrder the new ordering of the atoms (should be numAtoms long)
509 for example: if newOrder is [3,2,0,1], then atom 3 in the original
510 molecule will be atom 0 in the new one
511
512 \return the new molecule
513
514 <b>Notes:</b>
515 - the caller is responsible for <tt>delete</tt>ing the pointer this
516 returns.
517
518*/
520 const ROMol &mol, const std::vector<unsigned int> &newOrder);
521
522//! @}
523
524//! \name Sanitization
525/// {
526
527// clang-format off
529 SANITIZE_NONE = 0x0,
530 SANITIZE_CLEANUP = 0x1,
532 SANITIZE_SYMMRINGS = 0x4,
533 SANITIZE_KEKULIZE = 0x8,
539 SANITIZE_ADJUSTHS = 0x200,
542 SANITIZE_ALL = 0xFFFFFFF
543);
544// clang-format on
545
546//! \brief carries out a collection of tasks for cleaning up a molecule and
547//! ensuring that it makes "chemical sense"
548/*!
549 This functions calls the following in sequence
550 -# MolOps::cleanUp()
551 -# mol.updatePropertyCache()
552 -# MolOps::symmetrizeSSSR()
553 -# MolOps::Kekulize()
554 -# MolOps::assignRadicals()
555 -# MolOps::setAromaticity()
556 -# MolOps::setConjugation()
557 -# MolOps::setHybridization()
558 -# MolOps::cleanupChirality()
559 -# MolOps::adjustHs()
560 -# mol.updatePropertyCache()
561
562 \param mol : the RWMol to be cleaned
563
564 \param operationThatFailed : the first (if any) sanitization operation that
565 fails is set here.
566 The values are taken from the \c SanitizeFlags
567 enum. On success, the value is \c
568 SanitizeFlags::SANITIZE_NONE
569
570 \param sanitizeOps : the bits here are used to set which sanitization
571 operations are carried out. The elements of the \c
572 SanitizeFlags enum define the operations.
573
574 <b>Notes:</b>
575 - If there is a failure in the sanitization, a \c MolSanitizeException
576 will be thrown.
577 - in general the user of this function should cast the molecule following
578 this function to a ROMol, so that new atoms and bonds cannot be added to
579 the molecule and screw up the sanitizing that has been done here
580*/
582 RWMol &mol, unsigned int &operationThatFailed,
583 unsigned int sanitizeOps = SanitizeFlags::SANITIZE_ALL);
584//! \overload
586
587//! \brief Identifies chemistry problems (things that don't make chemical
588//! sense) in a molecule
589/*!
590 This functions uses the operations in sanitizeMol but does not change
591 the input structure and returns a list of the problems encountered instead
592 of stopping at the first failure,
593
594 The problems this looks for come from the sanitization operations:
595 -# mol.updatePropertyCache() : Unreasonable valences
596 -# MolOps::Kekulize() : Unkekulizable ring systems, aromatic atoms not
597 in rings, aromatic bonds to non-aromatic atoms.
598
599 \param mol : the ROMol to be cleaned
600
601 \param sanitizeOps : the bits here are used to set which sanitization
602 operations are carried out. The elements of the \c
603 SanitizeFlags enum define the operations.
604
605 \return a vector of \c MolSanitizeException values that indicate what
606 problems were encountered
607
608*/
610std::vector<std::unique_ptr<MolSanitizeException>> detectChemistryProblems(
611 const ROMol &mol, unsigned int sanitizeOps = SanitizeFlags::SANITIZE_ALL);
612
613//! Possible aromaticity models
614/*!
615- \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT
616- \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit
617Book)
618- \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it
619does not consider the outer envelope of fused rings)
620- \c AROMATICITY_MDL
621- \c AROMATICIT_MMFF94 the aromaticity model used by the MMFF94 force field
622- \c AROMATICITY_CUSTOM uses a caller-provided function
623*/
624typedef enum {
625 AROMATICITY_DEFAULT = 0x0, ///< future proofing
630 AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function
632
633//! sets the aromaticity model for a molecule to MMFF94
635
636//! Sets up the aromaticity for a molecule
637/*!
638
639 This is what happens here:
640 -# find all the simple rings by calling the findSSSR function
641 -# loop over all the Atoms in each ring and mark them if they are
642 candidates
643 for aromaticity. A ring atom is a candidate if it can spare electrons
644 to the ring and if it's from the first two rows of the periodic table.
645 -# based on the candidate atoms, mark the rings to be either candidates
646 or non-candidates. A ring is a candidate only if all its atoms are
647 candidates
648 -# apply Hueckel rule to each of the candidate rings to check if the ring
649 can be
650 aromatic
651
652 \param mol the RWMol of interest
653 \param model the aromaticity model to use
654 \param func a custom function for assigning aromaticity (only used when
655 model=\c AROMATICITY_CUSTOM)
656
657 \return >0 on success, <= 0 otherwise
658
659 <b>Assumptions:</b>
660 - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
661 been called)
662
663*/
666 int (*func)(RWMol &) = nullptr);
667
668//! Designed to be called by the sanitizer to handle special cases before
669/// anything is done.
670/*!
671
672 Currently this:
673 - modifies nitro groups, so that the nitrogen does not have an
674 unreasonable valence of 5, as follows:
675 - the nitrogen gets a positive charge
676 - one of the oxygens gets a negative chage and the double bond to
677 this oxygen is changed to a single bond The net result is that nitro groups
678 can be counted on to be: \c "[N+](=O)[O-]"
679 - modifies halogen-oxygen containing species as follows:
680 \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
681 \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
682 \c [Cl,Br,I](=O)O -> [X+]([O-])O
683 - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
684
685 \param mol the molecule of interest
686
687*/
689
690//! Designed to be called by the sanitizer to handle special cases for
691//! organometallic species before valence is perceived
692/*!
693
694 \b Note that this function is experimental and may either change in
695 behavior or be replaced with something else in future releases.
696
697 Currently this:
698 - replaces single bonds between "hypervalent" organic atoms and metals
699 with dative bonds (this is following an IUPAC recommendation:
700 https://iupac.qmul.ac.uk/tetrapyrrole/TP8.html)
701
702 \param mol the molecule of interest
703
704*/
706
707//! Called by the sanitizer to assign radical counts to atoms
709
710//! adjust the number of implicit and explicit Hs for special cases
711/*!
712
713 Currently this:
714 - modifies aromatic nitrogens so that, when appropriate, they have an
715 explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
716
717 \param mol the molecule of interest
718
719 <b>Assumptions</b>
720 - this is called after the molecule has been sanitized,
721 aromaticity has been perceived, and the implicit valence of
722 everything has been calculated.
723
724*/
726
727//! Kekulizes the molecule
728/*!
729
730 \param mol the molecule of interest
731
732 \param markAtomsBonds if this is set to true, \c isAromatic boolean
733 settings on both the Bonds and Atoms are turned to false following the
734 Kekulization, otherwise they are left alone in their original state.
735
736 \param maxBackTracks the maximum number of attempts at back-tracking. The
737 algorithm uses a back-tracking procedure to revisit a previous setting of
738 double bond if we hit a wall in the kekulization process
739
740 <b>Notes:</b>
741 - this does not modify query bonds which have bond type queries (like
742 those which come from SMARTS) or rings containing them.
743 - even if \c markAtomsBonds is \c false the \c BondType for all modified
744 aromatic bonds will be changed from \c RDKit::Bond::AROMATIC to \c
745 RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
746
747*/
749 unsigned int maxBackTracks = 100);
750//! Kekulizes the molecule if possible. If the kekulization fails the molecule
751//! will not be modified
752/*!
753
754 \param mol the molecule of interest
755
756 \param markAtomsBonds if this is set to true, \c isAromatic boolean
757 settings on both the Bonds and Atoms are turned to false following the
758 Kekulization, otherwise they are left alone in their original state.
759
760 \param maxBackTracks the maximum number of attempts at back-tracking. The
761 algorithm uses a back-tracking procedure to revisit a previous setting of
762 double bond if we hit a wall in the kekulization process
763
764 \returns whether or not the kekulization succeeded
765
766 <b>Notes:</b>
767 - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
768 bonds will be changed from \c RDKit::Bond::AROMATIC to \c
769 RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
770
771*/
773 bool markAtomsBonds = true,
774 unsigned int maxBackTracks = 100);
775
776//! flags the molecule's conjugated bonds
778
779//! calculates and sets the hybridization of all a molecule's Stoms
781
782//! @}
783
784//! \name Ring finding and SSSR
785//! @{
786
787//! finds a molecule's Smallest Set of Smallest Rings
788/*!
789 Currently this implements a modified form of Figueras algorithm
790 (JCICS - Vol. 36, No. 5, 1996, 986-991)
791
792 \param mol the molecule of interest
793 \param res used to return the vector of rings. Each entry is a vector with
794 atom indices. This information is also stored in the molecule's
795 RingInfo structure, so this argument is optional (see overload)
796 \param includeDativeBonds - determines whether or not dative bonds are used
797 in the ring finding.
798
799 \return number of smallest rings found
800
801 Base algorithm:
802 - The original algorithm starts by finding representative degree 2
803 nodes.
804 - Representative because if a series of deg 2 nodes are found only
805 one of them is picked.
806 - The smallest ring around each of them is found.
807 - The bonds that connect to this degree 2 node are them chopped off,
808 yielding
809 new deg two nodes
810 - The process is repeated on the new deg 2 nodes.
811 - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
812 with it is found. A bond from this is "carefully" (look in the paper)
813 selected and chopped, yielding deg 2 nodes. The process is same as
814 above once this is done.
815
816 Our Modifications:
817 - If available, more than one smallest ring around a representative deg 2
818 node will be computed and stored
819 - Typically 3 rings are found around a degree 3 node (when no deg 2s are
820 available)
821 and all the bond to that node are chopped.
822 - The extra rings that were found in this process are removed after all
823 the nodes have been covered.
824
825 These changes were motivated by several factors:
826 - We believe the original algorithm fails to find the correct SSSR
827 (finds the correct number of them but the wrong ones) on some sample
828 mols
829 - Since SSSR may not be unique, a post-SSSR step to symmetrize may be
830 done. The extra rings this process adds can be quite useful.
831*/
833 std::vector<std::vector<int>> &res,
834 bool includeDativeBonds = false);
835//! \overload
837 std::vector<std::vector<int>> *res = nullptr,
838 bool includeDativeBonds = false);
839
840//! use a DFS algorithm to identify ring bonds and atoms in a molecule
841/*!
842 \b NOTE: though the RingInfo structure is populated by this function,
843 the only really reliable calls that can be made are to check if
844 mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
845 return values >0
846*/
848
850
851//! symmetrize the molecule's Smallest Set of Smallest Rings
852/*!
853 SSSR rings obatined from "findSSSR" can be non-unique in some case.
854 For example, cubane has five SSSR rings, not six as one would hope.
855
856 This function adds additional rings to the SSSR list if necessary
857 to make the list symmetric, e.g. all atoms in cubane will be part of the
858 same number of SSSRs. This function choses these extra rings from the extra
859 rings computed and discarded during findSSSR. The new ring are chosen such
860 that:
861 - replacing a same sized ring in the SSSR list with an extra ring yields
862 the same union of bond IDs as the original SSSR list
863
864 \param mol - the molecule of interest
865 \param res used to return the vector of rings. Each entry is a vector with
866 atom indices. This information is also stored in the molecule's
867 RingInfo structure, so this argument is optional (see overload)
868 \param includeDativeBonds - determines whether or not dative bonds are used
869 in the ring finding.
870
871 \return the total number of rings = (new rings + old SSSRs)
872
873 <b>Notes:</b>
874 - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
875 first
876*/
878 std::vector<std::vector<int>> &res,
879 bool includeDativeBonds = false);
880//! \overload
882 bool includeDativeBonds = false);
883
884//! @}
885
886//! \name Shortest paths and other matrices
887//! @{
888
889//! returns a molecule's adjacency matrix
890/*!
891 \param mol the molecule of interest
892 \param useBO toggles use of bond orders in the matrix
893 \param emptyVal sets the empty value (for non-adjacent atoms)
894 \param force forces calculation of the matrix, even if already
895 computed
896 \param propNamePrefix used to set the cached property name
897 \param bondsToUse used to limit which bonds are considered
898
899 \return the adjacency matrix.
900
901 <b>Notes</b>
902 - The result of this is cached in the molecule's local property
903 dictionary, which will handle deallocation. The caller should <b>not</b> \c
904 delete this pointer.
905
906*/
908 const ROMol &mol, bool useBO = false, int emptyVal = 0, bool force = false,
909 const char *propNamePrefix = nullptr,
910 const boost::dynamic_bitset<> *bondsToUse = nullptr);
911
912//! Computes the molecule's topological distance matrix
913/*!
914 Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
915
916 \param mol the molecule of interest
917 \param useBO toggles use of bond orders in the matrix
918 \param useAtomWts sets the diagonal elements of the result to
919 6.0/(atomic number) so that the matrix can be used to calculate
920 Balaban J values. This does not affect the bond weights.
921 \param force forces calculation of the matrix, even if already
922 computed
923 \param propNamePrefix used to set the cached property name
924
925 \return the distance matrix.
926
927 <b>Notes</b>
928 - The result of this is cached in the molecule's local property
929 dictionary, which will handle deallocation. The caller should <b>not</b> \c
930 delete this pointer.
931
932
933*/
935 const ROMol &mol, bool useBO = false, bool useAtomWts = false,
936 bool force = false, const char *propNamePrefix = nullptr);
937
938//! Computes the molecule's topological distance matrix
939/*!
940 Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
941
942 \param mol the molecule of interest
943 \param activeAtoms only elements corresponding to these atom indices
944 will be included in the calculation
945 \param bonds only bonds found in this list will be included in the
946 calculation
947 \param useBO toggles use of bond orders in the matrix
948 \param useAtomWts sets the diagonal elements of the result to
949 6.0/(atomic number) so that the matrix can be used to calculate
950 Balaban J values. This does not affect the bond weights.
951
952 \return the distance matrix.
953
954 <b>Notes</b>
955 - The results of this call are not cached, the caller <b>should</b> \c
956 delete
957 this pointer.
958
959
960*/
962 const ROMol &mol, const std::vector<int> &activeAtoms,
963 const std::vector<const Bond *> &bonds, bool useBO = false,
964 bool useAtomWts = false);
965
966//! Computes the molecule's 3D distance matrix
967/*!
968
969 \param mol the molecule of interest
970 \param confId the conformer to use
971 \param useAtomWts sets the diagonal elements of the result to
972 6.0/(atomic number)
973 \param force forces calculation of the matrix, even if already
974 computed
975 \param propNamePrefix used to set the cached property name
976 (if set to an empty string, the matrix will not be
977 cached)
978
979 \return the distance matrix.
980
981 <b>Notes</b>
982 - If propNamePrefix is not empty the result of this is cached in the
983 molecule's local property dictionary, which will handle deallocation.
984 In other cases the caller is responsible for freeing the memory.
985
986*/
988 const ROMol &mol, int confId = -1, bool useAtomWts = false,
989 bool force = false, const char *propNamePrefix = nullptr);
990
991//! Find the shortest path between two atoms
992/*!
993 Uses the Bellman-Ford algorithm
994
995 \param mol molecule of interest
996 \param aid1 index of the first atom
997 \param aid2 index of the second atom
998
999 \return an std::list with the indices of the atoms along the shortest
1000 path
1001
1002 <b>Notes:</b>
1003 - the starting and end atoms are included in the path
1004 - if no path is found, an empty path is returned
1005
1006*/
1007RDKIT_GRAPHMOL_EXPORT std::list<int> getShortestPath(const ROMol &mol, int aid1,
1008 int aid2);
1009
1010//! @}
1011
1012//! \name Stereochemistry
1013//! @{
1014
1015// class to hold hybridizations
1016
1018 public:
1020 throw FileParseException("not to be called without a mol parameter");
1021 };
1024 throw FileParseException("not to be called without a mol parameter");
1025 };
1026
1027 ~Hybridizations() = default;
1028
1030 return static_cast<Atom::HybridizationType>(d_hybridizations[idx]);
1031 }
1032 // Atom::HybridizationType &operator[](unsigned int idx) {
1033 // return static_cast<Atom::HybridizationType>(d_hybridizations[idx]);
1034 // d_hybridizations[d_hybridizations[idx]];
1035 // }
1036
1037 // // void clear() { d_hybridizations.clear(); }
1038 // // void resize(unsigned int sz) { d_hybridizations.resize(sz); }
1039 unsigned int size() const { return d_hybridizations.size(); }
1040
1041 private:
1042 std::vector<int> d_hybridizations;
1043};
1044
1045//! removes bogus chirality markers (e.g. tetrahedral flags on non-sp3
1046//! centers):
1048
1049//! removes bogus atropisomeric markers (e.g. those without sp2 begin and end
1050//! atoms):
1053//! \overload
1055
1056//! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms
1057/*!
1058 \param mol the molecule of interest
1059 \param confId the conformer to use
1060 \param replaceExistingTags if this flag is true, any existing atomic chiral
1061 tags will be replaced
1062
1063 If the conformer provided is not a 3D conformer, nothing will be done.
1064
1065
1066 NOTE that this does not check to see if atoms are chiral centers (i.e. all
1067 substituents are different), it merely sets the chiral type flags based on
1068 the coordinates and atom ordering. Use \c assignStereochemistryFrom3D() if
1069 you want chiral flags only on actual stereocenters.
1070*/
1072 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1073
1074//! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms and
1075//! stereo flags to its bonds
1076/*!
1077
1078 \param mol the molecule of interest
1079 \param confId the conformer to use
1080 \param replaceExistingTags if this flag is true, any existing info about
1081 stereochemistry will be replaced
1082
1083 If the conformer provided is not a 3D conformer, nothing will be done.
1084*/
1086 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1087
1088//! \brief Use bond directions to assign ChiralTypes to a molecule's atoms
1089/*!
1090
1091 \param mol the molecule of interest
1092 \param confId the conformer to use
1093 \param replaceExistingTags if this flag is true, any existing info about
1094 stereochemistry will be replaced
1095*/
1097 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1098
1099//! \deprecated: this function will be removed in a future release. Use
1100//! setDoubleBondNeighborDirections() instead
1102 int confId = -1);
1103//! Sets bond directions based on double bond stereochemistry
1105 ROMol &mol, const Conformer *conf = nullptr);
1106//! removes directions from single bonds. The property _UnknownStereo will be
1107//! set on wiggly bonds
1109 bool onlyWedgeFlags = false);
1110
1111//! removes directions from all bonds. The property _UnknownStereo will be set
1112//! on wiggly bonds
1114//! removes directions from all bonds. The property _UnknownStereo will be set
1115//! on wiggly bonds
1117 bool onlyWedgeFlags = false);
1118
1119//! Assign CIS/TRANS bond stereochemistry tags based on neighboring
1120//! directions
1122
1123//! Assign stereochemistry tags to atoms and bonds.
1124/*!
1125 If useLegacyStereoPerception is true, it also does the CIP stereochemistry
1126 assignment for the molecule's atoms (R/S) and double bonds (Z/E).
1127 This assignment is based on legacy code which is fast, but is
1128 known to incorrectly assign CIP labels in some cases.
1129 instead, to assign CIP labels based on an accurate, though slower,
1130 implementation of the CIP rules, call CIPLabeler::assignCIPLabels().
1131 Chiral atoms will have a property '_CIPCode' indicating their chiral code.
1132
1133 \param mol the molecule to use
1134 \param cleanIt if true, any existing values of the property `_CIPCode`
1135 will be cleared, atoms with a chiral specifier that aren't
1136 actually chiral (e.g. atoms with duplicate
1137 substituents or only 2 substituents, etc.) will have
1138 their chiral code set to CHI_UNSPECIFIED. Bonds with
1139 STEREOCIS/STEREOTRANS specified that have duplicate
1140 substituents based upon the CIP atom ranks will be
1141 marked STEREONONE.
1142 \param force causes the calculation to be repeated even if it has
1143 already been done
1144 \param flagPossibleStereoCenters set the _ChiralityPossible property on
1145 atoms that are possible stereocenters
1146
1147 <b>Notes:M</b>
1148 - Throughout we assume that we're working with a hydrogen-suppressed
1149 graph.
1150
1151*/
1153 ROMol &mol, bool cleanIt = false, bool force = false,
1154 bool flagPossibleStereoCenters = false);
1155//! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
1156/// i.e. Z/E)
1157/*!
1158
1159 \param mol the molecule of interest
1160*/
1162
1163//! \brief finds bonds that could be cis/trans in a molecule and mark them as
1164//! Bond::STEREOANY.
1165/*!
1166 \param mol the molecule of interest
1167 \param cleanIt toggles removal of stereo flags from double bonds that can
1168 not have stereochemistry
1169
1170 This function finds any double bonds that can potentially be part of
1171 a cis/trans system. No attempt is made here to mark them cis or
1172 trans. No attempt is made to detect double bond stereo in ring systems.
1173
1174 This function is useful in the following situations:
1175 - when parsing a mol file; for the bonds marked here, coordinate
1176 information on the neighbors can be used to indentify cis or trans
1177 states
1178 - when writing a mol file; bonds that can be cis/trans but not marked as
1179 either need to be specially marked in the mol file
1180 - finding double bonds with unspecified stereochemistry so they
1181 can be enumerated for downstream 3D tools
1182
1183 The CIPranks on the neighboring atoms are checked in this function. The
1184 _CIPCode property if set to any on the double bond.
1185*/
1187 bool cleanIt = false);
1188//! \brief Uses the molParity atom property to assign ChiralType to a
1189//! molecule's atoms
1190/*!
1191 \param mol the molecule of interest
1192 \param replaceExistingTags if this flag is true, any existing atomic chiral
1193 tags will be replaced
1194*/
1196 ROMol &mol, bool replaceExistingTags = true);
1197
1198//! @}
1199
1200//! returns the number of atoms which have a particular property set
1202 const ROMol &mol, std::string prop);
1203
1204//! returns whether or not a molecule needs to have Hs added to it.
1206
1207//! \brief Replaces haptic bond with explicit dative bonds.
1208/*!
1209 *
1210 * @param mol the molecule of interest
1211 *
1212 * One way of showing haptic bonds (such as cyclopentadiene to iron in
1213 * ferrocene) is to use a dummy atom with a dative bond to the iron atom with
1214 * the bond labelled with the atoms involved in the organic end of the bond.
1215 * Another way is to have explicit dative bonds from the atoms of the haptic
1216 * group to the metal atom. This function converts the former representation
1217 * to the latter.
1218 */
1220
1221//! \overload modifies molecule in place.
1223
1224//! \brief Replaces explicit dative bonds with haptic.
1225/*!
1226 *
1227 * @param mol the molecule of interest
1228 *
1229 * Does the reverse of hapticBondsToDative. If there are multiple contiguous
1230 * atoms attached by dative bonds to an atom (probably a metal atom), the
1231 * dative bonds will be replaced by a dummy atom in their centre attached to
1232 * the (metal) atom by a dative bond, which is labelled with ENDPTS of the
1233 * atoms that had the original dative bonds.
1234 */
1236
1237//! \overload modifies molecule in place.
1239
1240/*!
1241 Calculates a molecule's average molecular weight
1242
1243 \param mol the molecule of interest
1244 \param onlyHeavy (optional) if this is true (the default is false),
1245 only heavy atoms will be included in the MW calculation
1246
1247 \return the AMW
1248*/
1250 bool onlyHeavy = false);
1251/*!
1252 Calculates a molecule's exact molecular weight
1253
1254 \param mol the molecule of interest
1255 \param onlyHeavy (optional) if this is true (the default is false),
1256 only heavy atoms will be included in the MW calculation
1257
1258 \return the exact MW
1259*/
1261 bool onlyHeavy = false);
1262
1263/*!
1264 Calculates a molecule's formula
1265
1266 \param mol the molecule of interest
1267 \param separateIsotopes if true, isotopes will show up separately in the
1268 formula. So C[13CH2]O will give the formula: C[13C]H6O
1269 \param abbreviateHIsotopes if true, 2H and 3H will be represented as
1270 D and T instead of [2H] and [3H]. This only applies if \c
1271 separateIsotopes is true
1272
1273 \return the formula as a string
1274*/
1276 const ROMol &mol, bool separateIsotopes = false,
1277 bool abbreviateHIsotopes = true);
1278
1279namespace details {
1280//! not recommended for use in other code
1281RDKIT_GRAPHMOL_EXPORT void KekulizeFragment(
1282 RWMol &mol, const boost::dynamic_bitset<> &atomsToUse,
1283 boost::dynamic_bitset<> bondsToUse, bool markAtomsBonds = true,
1284 unsigned int maxBackTracks = 100);
1285
1286// If the bond is dative, and it has a common_properties::MolFileBondEndPts
1287// prop, returns a vector of the indices of the atoms mentioned in the prop.
1288RDKIT_GRAPHMOL_EXPORT std::vector<int> hapticBondEndpoints(const Bond *bond);
1289
1290} // namespace details
1291
1292//! attachment points encoded as attachPt properties are added to the graph as
1293/// dummy atoms
1294/*!
1295 *
1296 * @param mol the molecule of interest
1297 * @param addAsQueries if true, the dummy atoms will be added as null queries
1298 * (i.e. they will match any atom in a substructure search)
1299 * @param addCoords if true and the molecule has one or more conformers,
1300 * positions for the attachment points will be added to the conformer(s).
1301 *
1302 */
1304 bool addAsQueries = true,
1305 bool addCoords = true);
1306//! dummy atoms in the graph are removed and replaced with attachment point
1307//! annotations on the attached atoms
1308/*!
1309 *
1310 * @param mol the molecule of interest
1311 * @param markedOnly if true, only dummy atoms with the _fromAttachPoint
1312 * property will be collapsed
1313 *
1314 * In order for a dummy atom to be considered for collapsing it must have:
1315 * - degree 1 with a single or unspecified bond
1316 * - the bond to it can not be wedged
1317 * - either no query or be an AtomNullQuery
1318 *
1319 */
1321 bool markedOnly = true);
1322
1323namespace details {
1324//! attachment points encoded as attachPt properties are added to the graph as
1325/// dummy atoms
1326/*!
1327 *
1328 * @param mol the molecule of interest
1329 * @param atomIdx the index of the atom to which the attachment point should
1330 * be added
1331 * @param val the attachment point value. Should be 1 or 2
1332 * @param addAsQueries if true, the dummy atoms will be added as null queries
1333 * (i.e. they will match any atom in a substructure search)
1334 * @param addCoords if true and the molecule has one or more conformers,
1335 * positions for the attachment points will be added to the conformer(s).
1336 *
1337 */
1338RDKIT_GRAPHMOL_EXPORT unsigned int addExplicitAttachmentPoint(
1339 RWMol &mol, unsigned int atomIdx, unsigned int val, bool addAsQuery = true,
1340 bool addCoords = true);
1341
1342//! returns whether or not an atom is an attachment point
1343/*!
1344 *
1345 * @param mol the molecule of interest
1346 * @param markedOnly if true, only dummy atoms with the _fromAttachPoint
1347 * property will be collapsed
1348 *
1349 * In order for a dummy atom to be considered for collapsing it must have:
1350 * - degree 1 with a single or unspecified bond
1351 * - the bond to it can not be wedged
1352 * - either no query or be an AtomNullQuery
1353 *
1354 */
1355RDKIT_GRAPHMOL_EXPORT bool isAttachmentPoint(const Atom *atom,
1356 bool markedOnly = true);
1357
1358} // namespace details
1359
1360} // namespace MolOps
1361} // namespace RDKit
1362
1363#endif
#define BETTER_ENUM(Enum, Underlying,...)
Definition BetterEnums.h:17
RDKIT_GRAPHMOL_EXPORT const int ci_LOCAL_INF
The class for representing atoms.
Definition Atom.h:75
HybridizationType
store hybridization
Definition Atom.h:88
class for representing a bond
Definition Bond.h:47
The class for representing 2D or 3D conformation of a molecule.
Definition Conformer.h:46
used by various file parsing classes to indicate a parse error
unsigned int size() const
Definition MolOps.h:1039
Atom::HybridizationType operator[](int idx)
Definition MolOps.h:1029
Hybridizations(const Hybridizations &)
Definition MolOps.h:1023
Hybridizations(const ROMol &mol)
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
#define RDKIT_GRAPHMOL_EXPORT
Definition export.h:233
RDKIT_GRAPHMOL_EXPORT void cleanUp(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms and bonds.
RDKIT_GRAPHMOL_EXPORT bool KekulizeIfPossible(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol, std::vector< std::vector< int > > &res, bool includeDativeBonds=false)
finds a molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
RDKIT_GRAPHMOL_EXPORT std::string getMolFormula(const ROMol &mol, bool separateIsotopes=false, bool abbreviateHIsotopes=true)
RDKIT_GRAPHMOL_EXPORT void cleanupAtropisomers(RWMol &mol, Hybridizations &hybridizations)
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromBondDirs(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Use bond directions to assign ChiralTypes to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT int setAromaticity(RWMol &mol, AromaticityModel model=AROMATICITY_DEFAULT, int(*func)(RWMol &)=nullptr)
Sets up the aromaticity for a molecule.
RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SanitizeFlags::SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and ensuring that it makes "chemical sen...
RDKIT_GRAPHMOL_EXPORT void findRingFamilies(const ROMol &mol)
RDKIT_GRAPHMOL_EXPORT double getExactMolWt(const ROMol &mol, bool onlyHeavy=false)
RDKIT_GRAPHMOL_EXPORT bool needsHs(const ROMol &mol)
returns whether or not a molecule needs to have Hs added to it.
RDKIT_GRAPHMOL_EXPORT void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
RDKIT_GRAPHMOL_EXPORT std::pair< bool, bool > hasQueryHs(const ROMol &mol)
returns a pair of booleans (hasQueryHs, hasUnmergaebleQueryHs)
RDKIT_GRAPHMOL_EXPORT std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=nullptr, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
RDKIT_GRAPHMOL_EXPORT int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
AromaticityModel
Possible aromaticity models.
Definition MolOps.h:624
@ AROMATICITY_RDKIT
Definition MolOps.h:626
@ AROMATICITY_MDL
Definition MolOps.h:628
@ AROMATICITY_CUSTOM
use a function
Definition MolOps.h:630
@ AROMATICITY_DEFAULT
future proofing
Definition MolOps.h:625
@ AROMATICITY_MMFF94
Definition MolOps.h:629
@ AROMATICITY_SIMPLE
Definition MolOps.h:627
RDKIT_GRAPHMOL_EXPORT void cleanUpOrganometallics(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's topological distance matrix.
RDKIT_GRAPHMOL_EXPORT ROMol * hapticBondsToDative(const ROMol &mol)
Replaces haptic bond with explicit dative bonds.
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT void removeStereochemistry(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void clearSingleBondDirFlags(ROMol &mol, bool onlyWedgeFlags=false)
RDKIT_GRAPHMOL_EXPORT ROMol * adjustQueryProperties(const ROMol &mol, const AdjustQueryParameters *params=nullptr)
returns a copy of a molecule with query properties adjusted
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromMolParity(ROMol &mol, bool replaceExistingTags=true)
Uses the molParity atom property to assign ChiralType to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false, bool mergeIsotopes=false)
RDKIT_GRAPHMOL_EXPORT void expandAttachmentPoints(RWMol &mol, bool addAsQueries=true, bool addCoords=true)
RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
RDKIT_GRAPHMOL_EXPORT void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
RDKIT_GRAPHMOL_EXPORT ROMol * dativeBondsToHaptic(const ROMol &mol)
Replaces explicit dative bonds with haptic.
RDKIT_GRAPHMOL_EXPORT void assignStereochemistryFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms and stereo flags to its bonds.
RDKIT_GRAPHMOL_EXPORT int countAtomElec(const Atom *at)
RDKIT_GRAPHMOL_EXPORT void detectBondStereochemistry(ROMol &mol, int confId=-1)
RDKIT_GRAPHMOL_EXPORT void setMMFFAromaticity(RWMol &mol)
sets the aromaticity model for a molecule to MMFF94
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_GRAPHMOL_EXPORT void parseAdjustQueryParametersFromJSON(MolOps::AdjustQueryParameters &p, const std::string &json)
updates an AdjustQueryParameters object from a JSON string
RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize=true)
removes all Hs from a molecule
RDKIT_GRAPHMOL_EXPORT void clearAllBondDirFlags(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void setBondStereoFromDirections(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's 3D distance matrix.
RDKIT_GRAPHMOL_EXPORT bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
RDKIT_GRAPHMOL_EXPORT int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int > > &res, bool includeDativeBonds=false)
symmetrize the molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT std::vector< std::unique_ptr< MolSanitizeException > > detectChemistryProblems(const ROMol &mol, unsigned int sanitizeOps=SanitizeFlags::SANITIZE_ALL)
Identifies chemistry problems (things that don't make chemical sense) in a molecule.
RDKIT_GRAPHMOL_EXPORT void clearDirFlags(ROMol &mol, bool onlyWedgeFlags=false)
RDKIT_GRAPHMOL_EXPORT void cleanupChirality(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=nullptr, const boost::dynamic_bitset<> *bondsToUse=nullptr)
returns a molecule's adjacency matrix
RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
RDKIT_GRAPHMOL_EXPORT void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
RDKIT_GRAPHMOL_EXPORT void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREOANY.
RDKIT_GRAPHMOL_EXPORT void addHs(RWMol &mol, const AddHsParameters &params, const UINT_VECT *onlyOnAtoms=nullptr)
adds Hs to a molecule as explicit Atoms
RDKIT_GRAPHMOL_EXPORT void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule's Stoms
RDKIT_GRAPHMOL_EXPORT void collapseAttachmentPoints(RWMol &mol, bool markedOnly=true)
RDKIT_GRAPHMOL_EXPORT unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop)
returns the number of atoms which have a particular property set
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
RDKIT_GRAPHMOL_EXPORT double getAvgMolWt(const ROMol &mol, bool onlyHeavy=false)
RDKIT_GRAPHMOL_EXPORT void setConjugation(ROMol &mol)
flags the molecule's conjugated bonds
RDKIT_GRAPHMOL_EXPORT void setDoubleBondNeighborDirections(ROMol &mol, const Conformer *conf=nullptr)
Sets bond directions based on double bond stereochemistry.
AdjustQueryWhichFlags
Definition MolOps.h:406
@ ADJUST_IGNORERINGS
Definition MolOps.h:409
@ ADJUST_IGNORENONE
Definition MolOps.h:407
@ ADJUST_IGNOREMAPPED
Definition MolOps.h:412
@ ADJUST_IGNORENONDUMMIES
Definition MolOps.h:411
@ ADJUST_IGNOREDUMMIES
Definition MolOps.h:410
@ ADJUST_IGNORECHAINS
Definition MolOps.h:408
@ ADJUST_IGNOREALL
Definition MolOps.h:413
Std stuff.
std::vector< double > INVAR_VECT
Definition MolOps.h:33
bool rdvalue_is(const RDValue_cast_t)
INVAR_VECT::iterator INVAR_VECT_I
Definition MolOps.h:34
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition MolOps.h:35
std::vector< UINT > UINT_VECT
Definition types.h:311
Parameters controlling the behavior of MolOps::adjustQueryProperties.
Definition MolOps.h:425
static AdjustQueryParameters noAdjustments()
returns an AdjustQueryParameters object with all adjustments disabled
Definition MolOps.h:476