RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MolOps.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2024 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_MOL_OPS_H
12#define RD_MOL_OPS_H
13
14#include <vector>
15#include <map>
16#include <list>
18#include <boost/smart_ptr.hpp>
19#include <boost/dynamic_bitset.hpp>
21#include <RDGeneral/types.h>
22#include "SanitException.h"
24
25RDKIT_GRAPHMOL_EXPORT extern const int ci_LOCAL_INF;
26namespace RDKit {
27class ROMol;
28class RWMol;
29class Atom;
30class Bond;
31class Conformer;
32typedef std::vector<double> INVAR_VECT;
33typedef INVAR_VECT::iterator INVAR_VECT_I;
34typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
35
36//! \brief Groups a variety of molecular query and transformation operations.
37namespace MolOps {
38
39//! return the number of electrons available on an atom to donate for
40/// aromaticity
41/*!
42 The result is determined using the default valency, number of lone pairs,
43 number of bonds and the formal charge. Note that the atom may not donate
44 all of these electrons to a ring for aromaticity (also used in Conjugation
45 and hybridization code).
46
47 \param at the atom of interest
48
49 \return the number of electrons
50*/
52
53//! sums up all atomic formal charges and returns the result
55
56//! returns whether or not the given Atom is involved in a conjugated bond
58
59//! find fragments (disconnected components of the molecular graph)
60/*!
61
62 \param mol the molecule of interest
63 \param mapping used to return the mapping of Atoms->fragments.
64 On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
65 and will contain the fragment assignment for each Atom
66
67 \return the number of fragments found.
68
69*/
70RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol,
71 std::vector<int> &mapping);
72//! find fragments (disconnected components of the molecular graph)
73/*!
74
75 \param mol the molecule of interest
76 \param frags used to return the Atoms in each fragment
77 On return \c mapping will be \c numFrags long, and each entry
78 will contain the indices of the Atoms in that fragment.
79
80 \return the number of fragments found.
81
82*/
84 const ROMol &mol, std::vector<std::vector<int>> &frags);
85
86//! splits a molecule into its component fragments
87/// (disconnected components of the molecular graph)
88/*!
89
90 \param mol the molecule of interest
91 \param molFrags used to return the disconnected fragments as molecules.
92 Any contents on input will be cleared.
93 \param sanitizeFrags toggles sanitization of the fragments after
94 they are built
95 \param frags used to return the mapping of Atoms->fragments.
96 if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
97 on return and will contain the fragment assignment for each Atom.
98 \param fragsMolAtomMapping used to return the Atoms in each fragment
99 On return \c mapping will be \c numFrags long, and each entry
100 will contain the indices of the Atoms in that fragment.
101 \param copyConformers toggles copying conformers of the fragments after
102 they are built
103 \return the number of fragments found.
104
105*/
107 const ROMol &mol, std::vector<std::unique_ptr<ROMol>> &molFrags,
108 bool sanitizeFrags = true, std::vector<int> *frags = nullptr,
109 std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
110 bool copyConformers = true);
111
112//! splits a molecule into its component fragments
113/// (disconnected components of the molecular graph)
114/*!
115
116 \param mol the molecule of interest
117 \param sanitizeFrags toggles sanitization of the fragments after
118 they are built
119 \param frags used to return the mapping of Atoms->fragments.
120 if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
121 on return and will contain the fragment assignment for each Atom
122 \param fragsMolAtomMapping used to return the Atoms in each fragment
123 On return \c mapping will be \c numFrags long, and each entry
124 will contain the indices of the Atoms in that fragment.
125 \param copyConformers toggles copying conformers of the fragments after
126 they are built
127 \return a vector of the fragments as smart pointers to ROMols
128
129*/
130RDKIT_GRAPHMOL_EXPORT std::vector<boost::shared_ptr<ROMol>> getMolFrags(
131 const ROMol &mol, bool sanitizeFrags = true,
132 std::vector<int> *frags = nullptr,
133 std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
134 bool copyConformers = true);
135
136//! splits a molecule into pieces based on labels assigned using a query
137/*!
138
139 \param mol the molecule of interest
140 \param query the query used to "label" the molecule for fragmentation
141 \param sanitizeFrags toggles sanitization of the fragments after
142 they are built
143 \param whiteList if provided, only labels in the list will be kept
144 \param negateList if true, the white list logic will be inverted: only labels
145 not in the list will be kept
146
147 \return a map of the fragments and their labels
148
149*/
150
151template <typename T>
152RDKIT_GRAPHMOL_EXPORT std::map<T, boost::shared_ptr<ROMol>>
153getMolFragsWithQuery(const ROMol &mol, T (*query)(const ROMol &, const Atom *),
154 bool sanitizeFrags = true,
155 const std::vector<T> *whiteList = nullptr,
156 bool negateList = false);
157//! splits a molecule into pieces based on labels assigned using a query,
158//! putting them into a map of std::unique_ptr<ROMol>.
159/*!
160
161 \param mol the molecule of interest
162 \param query the query used to "label" the molecule for fragmentation
163 \param molFrags used to return the disconnected fragments as molecules.
164 Any contents on input will be cleared.
165 \param sanitizeFrags toggles sanitization of the fragments after
166 they are built
167 \param whiteList if provided, only labels in the list will be kept
168 \param negateList if true, the white list logic will be inverted: only labels
169 not in the list will be kept
170
171 \return the number of fragments
172
173*/
174template <typename T>
176 const ROMol &mol, T (*query)(const ROMol &, const Atom *),
177 std::map<T, std::unique_ptr<ROMol>> &molFrags, bool sanitizeFrags = true,
178 const std::vector<T> *whiteList = nullptr, bool negateList = false);
179
180#if 0
181 //! finds a molecule's minimum spanning tree (MST)
182 /*!
183 \param mol the molecule of interest
184 \param mst used to return the MST as a vector of bond indices
185 */
186 RDKIT_GRAPHMOL_EXPORT void findSpanningTree(const ROMol &mol,std::vector<int> &mst);
187#endif
188
189//! \name Dealing with hydrogens
190//{@
191
192//! returns a copy of a molecule with hydrogens added in as explicit Atoms
193/*!
194 \param mol the molecule to add Hs to
195 \param explicitOnly (optional) if this \c true, only explicit Hs will be
196 added
197 \param addCoords (optional) If this is true, estimates for the atomic
198 coordinates
199 of the added Hs will be used.
200 \param onlyOnAtoms (optional) if provided, this should be a vector of
201 IDs of the atoms that will be considered for H addition.
202 \param addResidueInfo (optional) if this is true, add residue info to
203 hydrogen atoms (useful for PDB files).
204
205 \return the new molecule
206
207 <b>Notes:</b>
208 - it makes no sense to use the \c addCoords option if the molecule's
209 heavy
210 atoms don't already have coordinates.
211 - the caller is responsible for <tt>delete</tt>ing the pointer this
212 returns.
213 */
215 bool addCoords = false,
216 const UINT_VECT *onlyOnAtoms = nullptr,
217 bool addResidueInfo = false);
218//! \overload
219/// modifies the molecule in place
221 bool addCoords = false,
222 const UINT_VECT *onlyOnAtoms = nullptr,
223 bool addResidueInfo = false);
224
225//! Sets Cartesian coordinates for a terminal atom.
226//! Useful for growing an atom off a molecule with sensible
227//! coordinates based on the geometry of the neighbor.
228/*!
229 NOTE: this sets appropriate coordinates in all of the molecule's conformers.
230 \param mol the molecule the atoms belong to
231 \param idx index of the terminal atom whose coordinates are set
232 \param otherIdx index of the bonded neighbor atom
233*/
234
236 unsigned int otherIdx);
237
238//! returns a copy of a molecule with hydrogens removed
239/*!
240 \param mol the molecule to remove Hs from
241 \param implicitOnly (optional) if this \c true, only implicit Hs will be
242 removed
243 \param updateExplicitCount (optional) If this is \c true, when explicit Hs
244 are removed
245 from the graph, the heavy atom to which they are bound will have its
246 counter of
247 explicit Hs increased.
248 \param sanitize: (optional) If this is \c true, the final molecule will be
249 sanitized
250
251 \return the new molecule
252
253 <b>Notes:</b>
254 - Hydrogens which aren't connected to a heavy atom will not be
255 removed. This prevents molecules like <tt>"[H][H]"</tt> from having
256 all atoms removed.
257 - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
258 will not be removed.
259 - two coordinate Hs, like the central H in C[H-]C, will not be removed
260 - Hs connected to dummy atoms will not be removed
261 - Hs that are part of the definition of double bond Stereochemistry
262 will not be removed
263 - Hs that are not connected to anything else will not be removed
264 - Hs that have a query defined (i.e. hasQuery() returns true) will not
265 be removed
266
267 - the caller is responsible for <tt>delete</tt>ing the pointer this
268 returns.
269*/
270
272 bool implicitOnly = false,
273 bool updateExplicitCount = false,
274 bool sanitize = true);
275//! \overload
276/// modifies the molecule in place
278 bool updateExplicitCount = false,
279 bool sanitize = true);
281 bool removeDegreeZero = false; /**< hydrogens that have no bonds */
282 bool removeHigherDegrees = false; /**< hydrogens with two (or more) bonds */
283 bool removeOnlyHNeighbors =
284 false; /**< hydrogens with bonds only to other hydrogens */
285 bool removeIsotopes = false; /**< hydrogens with non-default isotopes */
286 bool removeAndTrackIsotopes = false; /**< removes hydrogens with non-default
287 isotopes and keeps track of the heavy atom the isotopes were attached to in
288 the private _isotopicHs atom property, so they are re-added by AddHs() as the
289 original isotopes if possible*/
290 bool removeDummyNeighbors =
291 false; /**< hydrogens with at least one dummy-atom neighbor */
292 bool removeDefiningBondStereo =
293 false; /**< hydrogens defining bond stereochemistry */
294 bool removeWithWedgedBond = true; /**< hydrogens with wedged bonds to them */
295 bool removeWithQuery = false; /**< hydrogens with queries defined */
296 bool removeMapped = true; /**< mapped hydrogens */
297 bool removeInSGroups = true; /**< part of a SubstanceGroup.
298 An H atom will only be removed if it doesn't cause any SGroup to become empty,
299 and if it doesn't play a special role in the SGroup (XBOND, attach point
300 or a CState) */
301 bool showWarnings = true; /**< display warnings for Hs that are not removed */
302 bool removeNonimplicit = true; /**< DEPRECATED equivalent of !implicitOnly */
303 bool updateExplicitCount =
304 false; /**< DEPRECATED equivalent of updateExplicitCount */
305 bool removeHydrides = true; /**< Removing Hydrides */
306 bool removeNontetrahedralNeighbors =
307 false; /**< remove Hs which are bonded to atoms with specified
308 non-tetrahedral stereochemistry */
309};
310//! \overload
311/// modifies the molecule in place
313 bool sanitize = true);
314//! \overload
315/// The caller owns the pointer this returns
317 const RemoveHsParameters &ps,
318 bool sanitize = true);
319
320//! removes all Hs from a molecule
321RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize = true);
322//! \overload
323/// The caller owns the pointer this returns
325 bool sanitize = true);
326
327//! returns a copy of a molecule with hydrogens removed and added as queries
328//! to the heavy atoms to which they are bound.
329/*!
330 This is really intended to be used with molecules that contain QueryAtoms
331
332 \param mol the molecule to remove Hs from
333
334 \return the new molecule
335
336 <b>Notes:</b>
337 - Atoms that do not already have hydrogen count queries will have one
338 added, other H-related queries will not be touched. Examples:
339 - C[H] -> [C;!H0]
340 - [C;H1][H] -> [C;H1]
341 - [C;H2][H] -> [C;H2]
342 - Hydrogens which aren't connected to a heavy atom will not be
343 removed. This prevents molecules like <tt>"[H][H]"</tt> from having
344 all atoms removed.
345 - the caller is responsible for <tt>delete</tt>ing the pointer this
346 returns.
347 - By default all hydrogens are removed, however if
348 mergeUnmappedOnly is true, any hydrogen participating
349 in an atom map will be retained
350
351*/
353 bool mergeUnmappedOnly = false,
354 bool mergeIsotopes = false);
355//! \overload
356/// modifies the molecule in place
358 bool mergeUnmappedOnly = false,
359 bool mergeIsotopes = false);
360
361//! returns a pair of booleans (hasQueryHs, hasUnmergaebleQueryHs)
362/*!
363 This is really intended to be used with molecules that contain QueryAtoms
364 such as when checking smarts patterns for explicit hydrogens
365
366
367 \param mol the molecule to check for query Hs from
368 \return std::pair if pair.first is true if the molecule has query hydrogens,
369 if pair.second is true, the queryHs cannot be removed my mergeQueryHs
370*/
371RDKIT_GRAPHMOL_EXPORT std::pair<bool, bool> hasQueryHs(const ROMol &mol);
372
382
383//! Parameters controlling the behavior of MolOps::adjustQueryProperties
384/*!
385
386 Note that some of the options here are either directly contradictory or make
387 no sense when combined with each other. We generally assume that client code
388 is doing something sensible and don't attempt to detect possible conflicts or
389 problems.
390
391*/
393 bool adjustDegree = true; /**< add degree queries */
394 std::uint32_t adjustDegreeFlags = ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS;
395
396 bool adjustRingCount = false; /**< add ring-count queries */
397 std::uint32_t adjustRingCountFlags =
399
400 bool makeDummiesQueries = true; /**< convert dummy atoms without isotope
401 labels to any-atom queries */
402
403 bool aromatizeIfPossible = true; /**< perceive and set aromaticity */
404
405 bool makeBondsGeneric =
406 false; /**< convert bonds to generic queries (any bonds) */
407 std::uint32_t makeBondsGenericFlags = ADJUST_IGNORENONE;
408
409 bool makeAtomsGeneric =
410 false; /**< convert atoms to generic queries (any atoms) */
411 std::uint32_t makeAtomsGenericFlags = ADJUST_IGNORENONE;
412
413 bool adjustHeavyDegree = false; /**< adjust the heavy-atom degree instead of
414 overall degree */
415 std::uint32_t adjustHeavyDegreeFlags =
417
418 bool adjustRingChain = false; /**< add ring-chain queries */
419 std::uint32_t adjustRingChainFlags = ADJUST_IGNORENONE;
420
421 bool useStereoCareForBonds =
422 false; /**< remove stereochemistry info from double bonds that do not have
423 the stereoCare property set */
424
425 bool adjustConjugatedFiveRings =
426 false; /**< sets bond queries in conjugated five-rings to
427 SINGLE|DOUBLE|AROMATIC */
428
429 bool setMDLFiveRingAromaticity =
430 false; /**< uses the 5-ring aromaticity behavior of the (former) MDL
431 software as documented in the Chemical Representation Guide */
432
433 bool adjustSingleBondsToDegreeOneNeighbors =
434 false; /**< sets single bonds between aromatic or conjugated atoms and
435 degree one neighbors to SINGLE|AROMATIC */
436
437 bool adjustSingleBondsBetweenAromaticAtoms =
438 false; /**< sets non-ring single bonds between two aromatic or conjugated
439 atoms to SINGLE|AROMATIC */
440
441 //! \brief returns an AdjustQueryParameters object with all adjustments
442 //! disabled
445 res.adjustDegree = false;
446 res.makeDummiesQueries = false;
447 res.aromatizeIfPossible = false;
448 return res;
449 }
451};
452
453//! updates an AdjustQueryParameters object from a JSON string
455 MolOps::AdjustQueryParameters &p, const std::string &json);
456
457//! returns a copy of a molecule with query properties adjusted
458/*!
459 \param mol the molecule to adjust
460 \param params controls the adjustments made
461
462 \return the new molecule, the caller owns the memory
463*/
465 const ROMol &mol, const AdjustQueryParameters *params = nullptr);
466//! \overload
467/// modifies the molecule in place
469 RWMol &mol, const AdjustQueryParameters *params = nullptr);
470
471//! returns a copy of a molecule with the atoms renumbered
472/*!
473
474 \param mol the molecule to work with
475 \param newOrder the new ordering of the atoms (should be numAtoms long)
476 for example: if newOrder is [3,2,0,1], then atom 3 in the original
477 molecule will be atom 0 in the new one
478
479 \return the new molecule
480
481 <b>Notes:</b>
482 - the caller is responsible for <tt>delete</tt>ing the pointer this
483 returns.
484
485*/
487 const ROMol &mol, const std::vector<unsigned int> &newOrder);
488
489//! @}
490
491//! \name Sanitization
492/// {
493
510
511//! \brief carries out a collection of tasks for cleaning up a molecule and
512//! ensuring that it makes "chemical sense"
513/*!
514 This functions calls the following in sequence
515 -# MolOps::cleanUp()
516 -# mol.updatePropertyCache()
517 -# MolOps::symmetrizeSSSR()
518 -# MolOps::Kekulize()
519 -# MolOps::assignRadicals()
520 -# MolOps::setAromaticity()
521 -# MolOps::setConjugation()
522 -# MolOps::setHybridization()
523 -# MolOps::cleanupChirality()
524 -# MolOps::adjustHs()
525 -# mol.updatePropertyCache()
526
527 \param mol : the RWMol to be cleaned
528
529 \param operationThatFailed : the first (if any) sanitization operation that
530 fails is set here.
531 The values are taken from the \c SanitizeFlags
532 enum. On success, the value is \c
533 SanitizeFlags::SANITIZE_NONE
534
535 \param sanitizeOps : the bits here are used to set which sanitization
536 operations are carried out. The elements of the \c
537 SanitizeFlags enum define the operations.
538
539 <b>Notes:</b>
540 - If there is a failure in the sanitization, a \c MolSanitizeException
541 will be thrown.
542 - in general the user of this function should cast the molecule following
543 this function to a ROMol, so that new atoms and bonds cannot be added to
544 the molecule and screw up the sanitizing that has been done here
545*/
547 unsigned int &operationThatFailed,
548 unsigned int sanitizeOps = SANITIZE_ALL);
549//! \overload
551
552//! \brief Identifies chemistry problems (things that don't make chemical
553//! sense) in a molecule
554/*!
555 This functions uses the operations in sanitizeMol but does not change
556 the input structure and returns a list of the problems encountered instead
557 of stopping at the first failure,
558
559 The problems this looks for come from the sanitization operations:
560 -# mol.updatePropertyCache() : Unreasonable valences
561 -# MolOps::Kekulize() : Unkekulizable ring systems, aromatic atoms not
562 in rings, aromatic bonds to non-aromatic atoms.
563
564 \param mol : the ROMol to be cleaned
565
566 \param sanitizeOps : the bits here are used to set which sanitization
567 operations are carried out. The elements of the \c
568 SanitizeFlags enum define the operations.
569
570 \return a vector of \c MolSanitizeException values that indicate what
571 problems were encountered
572
573*/
575std::vector<std::unique_ptr<MolSanitizeException>> detectChemistryProblems(
576 const ROMol &mol, unsigned int sanitizeOps = SANITIZE_ALL);
577
578//! Possible aromaticity models
579/*!
580- \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT
581- \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit
582Book)
583- \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it
584does not consider the outer envelope of fused rings)
585- \c AROMATICITY_MDL
586- \c AROMATICIT_MMFF94 the aromaticity model used by the MMFF94 force field
587- \c AROMATICITY_CUSTOM uses a caller-provided function
588*/
589typedef enum {
590 AROMATICITY_DEFAULT = 0x0, ///< future proofing
595 AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function
597
598//! sets the aromaticity model for a molecule to MMFF94
600
601//! Sets up the aromaticity for a molecule
602/*!
603
604 This is what happens here:
605 -# find all the simple rings by calling the findSSSR function
606 -# loop over all the Atoms in each ring and mark them if they are
607 candidates
608 for aromaticity. A ring atom is a candidate if it can spare electrons
609 to the ring and if it's from the first two rows of the periodic table.
610 -# based on the candidate atoms, mark the rings to be either candidates
611 or non-candidates. A ring is a candidate only if all its atoms are
612 candidates
613 -# apply Hueckel rule to each of the candidate rings to check if the ring
614 can be
615 aromatic
616
617 \param mol the RWMol of interest
618 \param model the aromaticity model to use
619 \param func a custom function for assigning aromaticity (only used when
620 model=\c AROMATICITY_CUSTOM)
621
622 \return >0 on success, <= 0 otherwise
623
624 <b>Assumptions:</b>
625 - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
626 been called)
627
628*/
631 int (*func)(RWMol &) = nullptr);
632
633//! Designed to be called by the sanitizer to handle special cases before
634/// anything is done.
635/*!
636
637 Currently this:
638 - modifies nitro groups, so that the nitrogen does not have an
639 unreasonable valence of 5, as follows:
640 - the nitrogen gets a positive charge
641 - one of the oxygens gets a negative chage and the double bond to
642 this oxygen is changed to a single bond The net result is that nitro groups
643 can be counted on to be: \c "[N+](=O)[O-]"
644 - modifies halogen-oxygen containing species as follows:
645 \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
646 \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
647 \c [Cl,Br,I](=O)O -> [X+]([O-])O
648 - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
649
650 \param mol the molecule of interest
651
652*/
654
655//! Designed to be called by the sanitizer to handle special cases for
656//! organometallic species before valence is perceived
657/*!
658
659 \b Note that this function is experimental and may either change in behavior
660 or be replaced with something else in future releases.
661
662 Currently this:
663 - replaces single bonds between "hypervalent" organic atoms and metals with
664 dative bonds (this is following an IUPAC recommendation:
665 https://iupac.qmul.ac.uk/tetrapyrrole/TP8.html)
666
667 \param mol the molecule of interest
668
669*/
671
672//! Called by the sanitizer to assign radical counts to atoms
674
675//! adjust the number of implicit and explicit Hs for special cases
676/*!
677
678 Currently this:
679 - modifies aromatic nitrogens so that, when appropriate, they have an
680 explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
681
682 \param mol the molecule of interest
683
684 <b>Assumptions</b>
685 - this is called after the molecule has been sanitized,
686 aromaticity has been perceived, and the implicit valence of
687 everything has been calculated.
688
689*/
691
692//! Kekulizes the molecule
693/*!
694
695 \param mol the molecule of interest
696
697 \param markAtomsBonds if this is set to true, \c isAromatic boolean
698 settings on both the Bonds and Atoms are turned to false following the
699 Kekulization, otherwise they are left alone in their original state.
700
701 \param maxBackTracks the maximum number of attempts at back-tracking. The
702 algorithm uses a back-tracking procedure to revisit a previous setting of
703 double bond if we hit a wall in the kekulization process
704
705 <b>Notes:</b>
706 - this does not modify query bonds which have bond type queries (like
707 those which come from SMARTS) or rings containing them.
708 - even if \c markAtomsBonds is \c false the \c BondType for all modified
709 aromatic bonds will be changed from \c RDKit::Bond::AROMATIC to \c
710 RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
711
712*/
714 unsigned int maxBackTracks = 100);
715//! Kekulizes the molecule if possible. If the kekulization fails the molecule
716//! will not be modified
717/*!
718
719 \param mol the molecule of interest
720
721 \param markAtomsBonds if this is set to true, \c isAromatic boolean
722 settings on both the Bonds and Atoms are turned to false following the
723 Kekulization, otherwise they are left alone in their original state.
724
725 \param maxBackTracks the maximum number of attempts at back-tracking. The
726 algorithm uses a back-tracking procedure to revisit a previous setting of
727 double bond if we hit a wall in the kekulization process
728
729 \returns whether or not the kekulization succeeded
730
731 <b>Notes:</b>
732 - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
733 bonds will be changed from \c RDKit::Bond::AROMATIC to \c
734 RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
735
736*/
738 bool markAtomsBonds = true,
739 unsigned int maxBackTracks = 100);
740
741//! flags the molecule's conjugated bonds
743
744//! calculates and sets the hybridization of all a molecule's Stoms
746
747//! @}
748
749//! \name Ring finding and SSSR
750//! @{
751
752//! finds a molecule's Smallest Set of Smallest Rings
753/*!
754 Currently this implements a modified form of Figueras algorithm
755 (JCICS - Vol. 36, No. 5, 1996, 986-991)
756
757 \param mol the molecule of interest
758 \param res used to return the vector of rings. Each entry is a vector with
759 atom indices. This information is also stored in the molecule's
760 RingInfo structure, so this argument is optional (see overload)
761 \param includeDativeBonds - determines whether or not dative bonds are used in
762 the ring finding.
763
764 \return number of smallest rings found
765
766 Base algorithm:
767 - The original algorithm starts by finding representative degree 2
768 nodes.
769 - Representative because if a series of deg 2 nodes are found only
770 one of them is picked.
771 - The smallest ring around each of them is found.
772 - The bonds that connect to this degree 2 node are them chopped off,
773 yielding
774 new deg two nodes
775 - The process is repeated on the new deg 2 nodes.
776 - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
777 with it is found. A bond from this is "carefully" (look in the paper)
778 selected and chopped, yielding deg 2 nodes. The process is same as
779 above once this is done.
780
781 Our Modifications:
782 - If available, more than one smallest ring around a representative deg 2
783 node will be computed and stored
784 - Typically 3 rings are found around a degree 3 node (when no deg 2s are
785 available)
786 and all the bond to that node are chopped.
787 - The extra rings that were found in this process are removed after all
788 the nodes have been covered.
789
790 These changes were motivated by several factors:
791 - We believe the original algorithm fails to find the correct SSSR
792 (finds the correct number of them but the wrong ones) on some sample
793 mols
794 - Since SSSR may not be unique, a post-SSSR step to symmetrize may be
795 done. The extra rings this process adds can be quite useful.
796*/
798 std::vector<std::vector<int>> &res,
799 bool includeDativeBonds = false);
800//! \overload
802 std::vector<std::vector<int>> *res = nullptr,
803 bool includeDativeBonds = false);
804
805//! use a DFS algorithm to identify ring bonds and atoms in a molecule
806/*!
807 \b NOTE: though the RingInfo structure is populated by this function,
808 the only really reliable calls that can be made are to check if
809 mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
810 return values >0
811*/
813
815
816//! symmetrize the molecule's Smallest Set of Smallest Rings
817/*!
818 SSSR rings obatined from "findSSSR" can be non-unique in some case.
819 For example, cubane has five SSSR rings, not six as one would hope.
820
821 This function adds additional rings to the SSSR list if necessary
822 to make the list symmetric, e.g. all atoms in cubane will be part of the
823 same number of SSSRs. This function choses these extra rings from the extra
824 rings computed and discarded during findSSSR. The new ring are chosen such
825 that:
826 - replacing a same sized ring in the SSSR list with an extra ring yields
827 the same union of bond IDs as the original SSSR list
828
829 \param mol - the molecule of interest
830 \param res used to return the vector of rings. Each entry is a vector with
831 atom indices. This information is also stored in the molecule's
832 RingInfo structure, so this argument is optional (see overload)
833 \param includeDativeBonds - determines whether or not dative bonds are used in
834 the ring finding.
835
836 \return the total number of rings = (new rings + old SSSRs)
837
838 <b>Notes:</b>
839 - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
840 first
841*/
843 std::vector<std::vector<int>> &res,
844 bool includeDativeBonds = false);
845//! \overload
847 bool includeDativeBonds = false);
848
849//! @}
850
851//! \name Shortest paths and other matrices
852//! @{
853
854//! returns a molecule's adjacency matrix
855/*!
856 \param mol the molecule of interest
857 \param useBO toggles use of bond orders in the matrix
858 \param emptyVal sets the empty value (for non-adjacent atoms)
859 \param force forces calculation of the matrix, even if already
860 computed
861 \param propNamePrefix used to set the cached property name
862 \param bondsToUse used to limit which bonds are considered
863
864 \return the adjacency matrix.
865
866 <b>Notes</b>
867 - The result of this is cached in the molecule's local property
868 dictionary, which will handle deallocation. The caller should <b>not</b> \c
869 delete this pointer.
870
871*/
873 const ROMol &mol, bool useBO = false, int emptyVal = 0, bool force = false,
874 const char *propNamePrefix = nullptr,
875 const boost::dynamic_bitset<> *bondsToUse = nullptr);
876
877//! Computes the molecule's topological distance matrix
878/*!
879 Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
880
881 \param mol the molecule of interest
882 \param useBO toggles use of bond orders in the matrix
883 \param useAtomWts sets the diagonal elements of the result to
884 6.0/(atomic number) so that the matrix can be used to calculate
885 Balaban J values. This does not affect the bond weights.
886 \param force forces calculation of the matrix, even if already
887 computed
888 \param propNamePrefix used to set the cached property name
889
890 \return the distance matrix.
891
892 <b>Notes</b>
893 - The result of this is cached in the molecule's local property
894 dictionary, which will handle deallocation. The caller should <b>not</b> \c
895 delete this pointer.
896
897
898*/
900 const ROMol &mol, bool useBO = false, bool useAtomWts = false,
901 bool force = false, const char *propNamePrefix = nullptr);
902
903//! Computes the molecule's topological distance matrix
904/*!
905 Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
906
907 \param mol the molecule of interest
908 \param activeAtoms only elements corresponding to these atom indices
909 will be included in the calculation
910 \param bonds only bonds found in this list will be included in the
911 calculation
912 \param useBO toggles use of bond orders in the matrix
913 \param useAtomWts sets the diagonal elements of the result to
914 6.0/(atomic number) so that the matrix can be used to calculate
915 Balaban J values. This does not affect the bond weights.
916
917 \return the distance matrix.
918
919 <b>Notes</b>
920 - The results of this call are not cached, the caller <b>should</b> \c
921 delete
922 this pointer.
923
924
925*/
927 const ROMol &mol, const std::vector<int> &activeAtoms,
928 const std::vector<const Bond *> &bonds, bool useBO = false,
929 bool useAtomWts = false);
930
931//! Computes the molecule's 3D distance matrix
932/*!
933
934 \param mol the molecule of interest
935 \param confId the conformer to use
936 \param useAtomWts sets the diagonal elements of the result to
937 6.0/(atomic number)
938 \param force forces calculation of the matrix, even if already
939 computed
940 \param propNamePrefix used to set the cached property name
941 (if set to an empty string, the matrix will not be
942 cached)
943
944 \return the distance matrix.
945
946 <b>Notes</b>
947 - If propNamePrefix is not empty the result of this is cached in the
948 molecule's local property dictionary, which will handle deallocation.
949 In other cases the caller is responsible for freeing the memory.
950
951*/
953 const ROMol &mol, int confId = -1, bool useAtomWts = false,
954 bool force = false, const char *propNamePrefix = nullptr);
955
956//! Find the shortest path between two atoms
957/*!
958 Uses the Bellman-Ford algorithm
959
960 \param mol molecule of interest
961 \param aid1 index of the first atom
962 \param aid2 index of the second atom
963
964 \return an std::list with the indices of the atoms along the shortest
965 path
966
967 <b>Notes:</b>
968 - the starting and end atoms are included in the path
969 - if no path is found, an empty path is returned
970
971*/
972RDKIT_GRAPHMOL_EXPORT std::list<int> getShortestPath(const ROMol &mol, int aid1,
973 int aid2);
974
975//! @}
976
977//! \name Stereochemistry
978//! @{
979
980// class to hold hybridizations
981
983 public:
985 throw FileParseException("not to be called without a mol parameter");
986 };
989 throw FileParseException("not to be called without a mol parameter");
990 };
991
992 ~Hybridizations() = default;
993
995 return static_cast<Atom::HybridizationType>(d_hybridizations[idx]);
996 }
997 // Atom::HybridizationType &operator[](unsigned int idx) {
998 // return static_cast<Atom::HybridizationType>(d_hybridizations[idx]);
999 // d_hybridizations[d_hybridizations[idx]];
1000 // }
1001
1002 // // void clear() { d_hybridizations.clear(); }
1003 // // void resize(unsigned int sz) { d_hybridizations.resize(sz); }
1004 unsigned int size() const { return d_hybridizations.size(); }
1005
1006 private:
1007 std::vector<int> d_hybridizations;
1008};
1009
1010//! removes bogus chirality markers (e.g. tetrahedral flags on non-sp3 centers):
1012
1013//! removes bogus atropisomeric markers (e.g. those without sp2 begin and end
1014//! atoms):
1017//! \overload
1019
1020//! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms
1021/*!
1022 \param mol the molecule of interest
1023 \param confId the conformer to use
1024 \param replaceExistingTags if this flag is true, any existing atomic chiral
1025 tags will be replaced
1026
1027 If the conformer provided is not a 3D conformer, nothing will be done.
1028
1029
1030 NOTE that this does not check to see if atoms are chiral centers (i.e. all
1031 substituents are different), it merely sets the chiral type flags based on
1032 the coordinates and atom ordering. Use \c assignStereochemistryFrom3D() if
1033 you want chiral flags only on actual stereocenters.
1034*/
1036 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1037
1038//! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms and
1039//! stereo flags to its bonds
1040/*!
1041
1042 \param mol the molecule of interest
1043 \param confId the conformer to use
1044 \param replaceExistingTags if this flag is true, any existing info about
1045 stereochemistry will be replaced
1046
1047 If the conformer provided is not a 3D conformer, nothing will be done.
1048*/
1050 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1051
1052//! \brief Use bond directions to assign ChiralTypes to a molecule's atoms and
1053//! stereo flags to its bonds
1054/*!
1055
1056 \param mol the molecule of interest
1057 \param confId the conformer to use
1058 \param replaceExistingTags if this flag is true, any existing info about
1059 stereochemistry will be replaced
1060*/
1062 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1063
1064//! \deprecated: this function will be removed in a future release. Use
1065//! setDoubleBondNeighborDirections() instead
1067 int confId = -1);
1068//! Sets bond directions based on double bond stereochemistry
1070 ROMol &mol, const Conformer *conf = nullptr);
1071//! removes directions from single bonds. The property _UnknownStereo will be
1072//! set on wiggly bonds
1074 bool onlyWedgeFlags = false);
1075
1076//! removes directions from all bonds. The property _UnknownStereo will be set
1077//! on wiggly bonds
1079//! removes directions from all bonds. The property _UnknownStereo will be set
1080//! on wiggly bonds
1082 bool onlyWedgeFlags = false);
1083
1084//! Assign CIS/TRANS bond stereochemistry tags based on neighboring
1085//! directions
1087
1088//! Assign stereochemistry tags to atoms and bonds.
1089/*!
1090 If useLegacyStereoPerception is true, it also does the CIP stereochemistry
1091 assignment for the molecule's atoms (R/S) and double bonds (Z/E).
1092 This assignment is based on legacy code which is fast, but is
1093 known to incorrectly assign CIP labels in some cases.
1094 instead, to assign CIP labels based on an accurate, though slower,
1095 implementation of the CIP rules, call CIPLabeler::assignCIPLabels().
1096 Chiral atoms will have a property '_CIPCode' indicating their chiral code.
1097
1098 \param mol the molecule to use
1099 \param cleanIt if true, any existing values of the property `_CIPCode`
1100 will be cleared, atoms with a chiral specifier that aren't
1101 actually chiral (e.g. atoms with duplicate
1102 substituents or only 2 substituents, etc.) will have
1103 their chiral code set to CHI_UNSPECIFIED. Bonds with
1104 STEREOCIS/STEREOTRANS specified that have duplicate
1105 substituents based upon the CIP atom ranks will be
1106 marked STEREONONE.
1107 \param force causes the calculation to be repeated even if it has
1108 already been done
1109 \param flagPossibleStereoCenters set the _ChiralityPossible property on
1110 atoms that are possible stereocenters
1111
1112 <b>Notes:M</b>
1113 - Throughout we assume that we're working with a hydrogen-suppressed
1114 graph.
1115
1116*/
1118 ROMol &mol, bool cleanIt = false, bool force = false,
1119 bool flagPossibleStereoCenters = false);
1120//! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
1121/// i.e. Z/E)
1122/*!
1123
1124 \param mol the molecule of interest
1125*/
1127
1128//! \brief finds bonds that could be cis/trans in a molecule and mark them as
1129//! Bond::STEREOANY.
1130/*!
1131 \param mol the molecule of interest
1132 \param cleanIt toggles removal of stereo flags from double bonds that can
1133 not have stereochemistry
1134
1135 This function finds any double bonds that can potentially be part of
1136 a cis/trans system. No attempt is made here to mark them cis or
1137 trans. No attempt is made to detect double bond stereo in ring systems.
1138
1139 This function is useful in the following situations:
1140 - when parsing a mol file; for the bonds marked here, coordinate
1141 information on the neighbors can be used to indentify cis or trans
1142 states
1143 - when writing a mol file; bonds that can be cis/trans but not marked as
1144 either need to be specially marked in the mol file
1145 - finding double bonds with unspecified stereochemistry so they
1146 can be enumerated for downstream 3D tools
1147
1148 The CIPranks on the neighboring atoms are checked in this function. The
1149 _CIPCode property if set to any on the double bond.
1150*/
1152 bool cleanIt = false);
1153//! \brief Uses the molParity atom property to assign ChiralType to a
1154//! molecule's atoms
1155/*!
1156 \param mol the molecule of interest
1157 \param replaceExistingTags if this flag is true, any existing atomic chiral
1158 tags will be replaced
1159*/
1161 ROMol &mol, bool replaceExistingTags = true);
1162
1163//! @}
1164
1165//! returns the number of atoms which have a particular property set
1167 const ROMol &mol, std::string prop);
1168
1169//! returns whether or not a molecule needs to have Hs added to it.
1171
1172//! \brief Replaces haptic bond with explicit dative bonds.
1173/*!
1174 *
1175 * @param mol the molecule of interest
1176 *
1177 * One way of showing haptic bonds (such as cyclopentadiene to iron in
1178 * ferrocene) is to use a dummy atom with a dative bond to the iron atom with
1179 * the bond labelled with the atoms involved in the organic end of the bond.
1180 * Another way is to have explicit dative bonds from the atoms of the haptic
1181 * group to the metal atom. This function converts the former representation to
1182 * the latter.
1183 */
1185
1186//! \overload modifies molecule in place.
1188
1189//! \brief Replaces explicit dative bonds with haptic.
1190/*!
1191 *
1192 * @param mol the molecule of interest
1193 *
1194 * Does the reverse of hapticBondsToDative. If there are multiple contiguous
1195 * atoms attached by dative bonds to an atom (probably a metal atom), the dative
1196 * bonds will be replaced by a dummy atom in their centre attached to the
1197 * (metal) atom by a dative bond, which is labelled with ENDPTS of the atoms
1198 * that had the original dative bonds.
1199 */
1201
1202//! \overload modifies molecule in place.
1204
1205/*!
1206 Calculates a molecule's average molecular weight
1207
1208 \param mol the molecule of interest
1209 \param onlyHeavy (optional) if this is true (the default is false),
1210 only heavy atoms will be included in the MW calculation
1211
1212 \return the AMW
1213*/
1215 bool onlyHeavy = false);
1216/*!
1217 Calculates a molecule's exact molecular weight
1218
1219 \param mol the molecule of interest
1220 \param onlyHeavy (optional) if this is true (the default is false),
1221 only heavy atoms will be included in the MW calculation
1222
1223 \return the exact MW
1224*/
1226 bool onlyHeavy = false);
1227
1228/*!
1229 Calculates a molecule's formula
1230
1231 \param mol the molecule of interest
1232 \param separateIsotopes if true, isotopes will show up separately in the
1233 formula. So C[13CH2]O will give the formula: C[13C]H6O
1234 \param abbreviateHIsotopes if true, 2H and 3H will be represented as
1235 D and T instead of [2H] and [3H]. This only applies if \c separateIsotopes
1236 is true
1237
1238 \return the formula as a string
1239*/
1241 const ROMol &mol, bool separateIsotopes = false,
1242 bool abbreviateHIsotopes = true);
1243
1244namespace details {
1245//! not recommended for use in other code
1246RDKIT_GRAPHMOL_EXPORT void KekulizeFragment(
1247 RWMol &mol, const boost::dynamic_bitset<> &atomsToUse,
1248 boost::dynamic_bitset<> bondsToUse, bool markAtomsBonds = true,
1249 unsigned int maxBackTracks = 100);
1250
1251// If the bond is dative, and it has a common_properties::MolFileBondEndPts
1252// prop, returns a vector of the indices of the atoms mentioned in the prop.
1253RDKIT_GRAPHMOL_EXPORT std::vector<int> hapticBondEndpoints(const Bond *bond);
1254
1255} // namespace details
1256
1257//! attachment points encoded as attachPt properties are added to the graph as
1258/// dummy atoms
1259/*!
1260 *
1261 * @param mol the molecule of interest
1262 * @param addAsQueries if true, the dummy atoms will be added as null queries
1263 * (i.e. they will match any atom in a substructure search)
1264 * @param addCoords if true and the molecule has one or more conformers,
1265 * positions for the attachment points will be added to the conformer(s).
1266 *
1267 */
1269 bool addAsQueries = true,
1270 bool addCoords = true);
1271//! dummy atoms in the graph are removed and replaced with attachment point
1272//! annotations on the attached atoms
1273/*!
1274 *
1275 * @param mol the molecule of interest
1276 * @param markedOnly if true, only dummy atoms with the _fromAttachPoint
1277 * property will be collapsed
1278 *
1279 * In order for a dummy atom to be considered for collapsing it must have:
1280 * - degree 1 with a single or unspecified bond
1281 * - the bond to it can not be wedged
1282 * - either no query or be an AtomNullQuery
1283 *
1284 */
1286 bool markedOnly = true);
1287
1288namespace details {
1289//! attachment points encoded as attachPt properties are added to the graph as
1290/// dummy atoms
1291/*!
1292 *
1293 * @param mol the molecule of interest
1294 * @param atomIdx the index of the atom to which the attachment point should be
1295 * added
1296 * @param val the attachment point value. Should be 1 or 2
1297 * @param addAsQueries if true, the dummy atoms will be added as null queries
1298 * (i.e. they will match any atom in a substructure search)
1299 * @param addCoords if true and the molecule has one or more conformers,
1300 * positions for the attachment points will be added to the conformer(s).
1301 *
1302 */
1303RDKIT_GRAPHMOL_EXPORT unsigned int addExplicitAttachmentPoint(
1304 RWMol &mol, unsigned int atomIdx, unsigned int val, bool addAsQuery = true,
1305 bool addCoords = true);
1306
1307//! returns whether or not an atom is an attachment point
1308/*!
1309 *
1310 * @param mol the molecule of interest
1311 * @param markedOnly if true, only dummy atoms with the _fromAttachPoint
1312 * property will be collapsed
1313 *
1314 * In order for a dummy atom to be considered for collapsing it must have:
1315 * - degree 1 with a single or unspecified bond
1316 * - the bond to it can not be wedged
1317 * - either no query or be an AtomNullQuery
1318 *
1319 */
1320RDKIT_GRAPHMOL_EXPORT bool isAttachmentPoint(const Atom *atom,
1321 bool markedOnly = true);
1322
1323} // namespace details
1324
1325} // namespace MolOps
1326} // namespace RDKit
1327
1328#endif
RDKIT_GRAPHMOL_EXPORT const int ci_LOCAL_INF
The class for representing atoms.
Definition Atom.h:75
HybridizationType
store hybridization
Definition Atom.h:86
class for representing a bond
Definition Bond.h:47
The class for representing 2D or 3D conformation of a molecule.
Definition Conformer.h:46
used by various file parsing classes to indicate a parse error
unsigned int size() const
Definition MolOps.h:1004
Atom::HybridizationType operator[](int idx)
Definition MolOps.h:994
Hybridizations(const Hybridizations &)
Definition MolOps.h:988
Hybridizations(const ROMol &mol)
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
#define RDKIT_GRAPHMOL_EXPORT
Definition export.h:233
RDKIT_GRAPHMOL_EXPORT void cleanUp(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms and bonds.
RDKIT_GRAPHMOL_EXPORT bool KekulizeIfPossible(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol, std::vector< std::vector< int > > &res, bool includeDativeBonds=false)
finds a molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
RDKIT_GRAPHMOL_EXPORT std::string getMolFormula(const ROMol &mol, bool separateIsotopes=false, bool abbreviateHIsotopes=true)
RDKIT_GRAPHMOL_EXPORT void cleanupAtropisomers(RWMol &mol, Hybridizations &hybridizations)
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromBondDirs(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Use bond directions to assign ChiralTypes to a molecule's atoms and stereo flags to its bonds.
RDKIT_GRAPHMOL_EXPORT int setAromaticity(RWMol &mol, AromaticityModel model=AROMATICITY_DEFAULT, int(*func)(RWMol &)=nullptr)
Sets up the aromaticity for a molecule.
RDKIT_GRAPHMOL_EXPORT void findRingFamilies(const ROMol &mol)
RDKIT_GRAPHMOL_EXPORT double getExactMolWt(const ROMol &mol, bool onlyHeavy=false)
RDKIT_GRAPHMOL_EXPORT bool needsHs(const ROMol &mol)
returns whether or not a molecule needs to have Hs added to it.
RDKIT_GRAPHMOL_EXPORT void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
RDKIT_GRAPHMOL_EXPORT std::pair< bool, bool > hasQueryHs(const ROMol &mol)
returns a pair of booleans (hasQueryHs, hasUnmergaebleQueryHs)
RDKIT_GRAPHMOL_EXPORT std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=nullptr, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
RDKIT_GRAPHMOL_EXPORT int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
AromaticityModel
Possible aromaticity models.
Definition MolOps.h:589
@ AROMATICITY_RDKIT
Definition MolOps.h:591
@ AROMATICITY_MDL
Definition MolOps.h:593
@ AROMATICITY_CUSTOM
use a function
Definition MolOps.h:595
@ AROMATICITY_DEFAULT
future proofing
Definition MolOps.h:590
@ AROMATICITY_MMFF94
Definition MolOps.h:594
@ AROMATICITY_SIMPLE
Definition MolOps.h:592
RDKIT_GRAPHMOL_EXPORT void cleanUpOrganometallics(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's topological distance matrix.
RDKIT_GRAPHMOL_EXPORT ROMol * hapticBondsToDative(const ROMol &mol)
Replaces haptic bond with explicit dative bonds.
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT void removeStereochemistry(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void clearSingleBondDirFlags(ROMol &mol, bool onlyWedgeFlags=false)
RDKIT_GRAPHMOL_EXPORT ROMol * adjustQueryProperties(const ROMol &mol, const AdjustQueryParameters *params=nullptr)
returns a copy of a molecule with query properties adjusted
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromMolParity(ROMol &mol, bool replaceExistingTags=true)
Uses the molParity atom property to assign ChiralType to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false, bool mergeIsotopes=false)
RDKIT_GRAPHMOL_EXPORT void expandAttachmentPoints(RWMol &mol, bool addAsQueries=true, bool addCoords=true)
RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
RDKIT_GRAPHMOL_EXPORT void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
RDKIT_GRAPHMOL_EXPORT ROMol * dativeBondsToHaptic(const ROMol &mol)
Replaces explicit dative bonds with haptic.
RDKIT_GRAPHMOL_EXPORT void assignStereochemistryFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms and stereo flags to its bonds.
@ SANITIZE_SETAROMATICITY
Definition MolOps.h:501
@ SANITIZE_CLEANUPATROPISOMERS
Definition MolOps.h:507
@ SANITIZE_PROPERTIES
Definition MolOps.h:497
@ SANITIZE_CLEANUP_ORGANOMETALLICS
Definition MolOps.h:506
@ SANITIZE_SETCONJUGATION
Definition MolOps.h:502
@ SANITIZE_SYMMRINGS
Definition MolOps.h:498
@ SANITIZE_ADJUSTHS
Definition MolOps.h:505
@ SANITIZE_CLEANUPCHIRALITY
Definition MolOps.h:504
@ SANITIZE_FINDRADICALS
Definition MolOps.h:500
@ SANITIZE_KEKULIZE
Definition MolOps.h:499
@ SANITIZE_SETHYBRIDIZATION
Definition MolOps.h:503
@ SANITIZE_CLEANUP
Definition MolOps.h:496
RDKIT_GRAPHMOL_EXPORT int countAtomElec(const Atom *at)
RDKIT_GRAPHMOL_EXPORT void detectBondStereochemistry(ROMol &mol, int confId=-1)
RDKIT_GRAPHMOL_EXPORT void setMMFFAromaticity(RWMol &mol)
sets the aromaticity model for a molecule to MMFF94
RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and ensuring that it makes "chemical sen...
RDKIT_GRAPHMOL_EXPORT void parseAdjustQueryParametersFromJSON(MolOps::AdjustQueryParameters &p, const std::string &json)
updates an AdjustQueryParameters object from a JSON string
RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize=true)
removes all Hs from a molecule
RDKIT_GRAPHMOL_EXPORT void clearAllBondDirFlags(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void setBondStereoFromDirections(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's 3D distance matrix.
RDKIT_GRAPHMOL_EXPORT bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
RDKIT_GRAPHMOL_EXPORT int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int > > &res, bool includeDativeBonds=false)
symmetrize the molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT void clearDirFlags(ROMol &mol, bool onlyWedgeFlags=false)
RDKIT_GRAPHMOL_EXPORT void cleanupChirality(RWMol &mol)
removes bogus chirality markers (e.g. tetrahedral flags on non-sp3 centers):
RDKIT_GRAPHMOL_EXPORT double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=nullptr, const boost::dynamic_bitset<> *bondsToUse=nullptr)
returns a molecule's adjacency matrix
RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
RDKIT_GRAPHMOL_EXPORT void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
RDKIT_GRAPHMOL_EXPORT std::vector< std::unique_ptr< MolSanitizeException > > detectChemistryProblems(const ROMol &mol, unsigned int sanitizeOps=SANITIZE_ALL)
Identifies chemistry problems (things that don't make chemical sense) in a molecule.
RDKIT_GRAPHMOL_EXPORT void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREOANY.
RDKIT_GRAPHMOL_EXPORT void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule's Stoms
RDKIT_GRAPHMOL_EXPORT void collapseAttachmentPoints(RWMol &mol, bool markedOnly=true)
RDKIT_GRAPHMOL_EXPORT unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop)
returns the number of atoms which have a particular property set
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
RDKIT_GRAPHMOL_EXPORT double getAvgMolWt(const ROMol &mol, bool onlyHeavy=false)
RDKIT_GRAPHMOL_EXPORT void setConjugation(ROMol &mol)
flags the molecule's conjugated bonds
RDKIT_GRAPHMOL_EXPORT void setDoubleBondNeighborDirections(ROMol &mol, const Conformer *conf=nullptr)
Sets bond directions based on double bond stereochemistry.
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_GRAPHMOL_EXPORT ROMol * addHs(const ROMol &mol, bool explicitOnly=false, bool addCoords=false, const UINT_VECT *onlyOnAtoms=nullptr, bool addResidueInfo=false)
returns a copy of a molecule with hydrogens added in as explicit Atoms
AdjustQueryWhichFlags
Definition MolOps.h:373
@ ADJUST_IGNORERINGS
Definition MolOps.h:376
@ ADJUST_IGNORENONE
Definition MolOps.h:374
@ ADJUST_IGNOREMAPPED
Definition MolOps.h:379
@ ADJUST_IGNORENONDUMMIES
Definition MolOps.h:378
@ ADJUST_IGNOREDUMMIES
Definition MolOps.h:377
@ ADJUST_IGNORECHAINS
Definition MolOps.h:375
@ ADJUST_IGNOREALL
Definition MolOps.h:380
Std stuff.
std::vector< double > INVAR_VECT
Definition MolOps.h:32
bool rdvalue_is(const RDValue_cast_t)
INVAR_VECT::iterator INVAR_VECT_I
Definition MolOps.h:33
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition MolOps.h:34
std::vector< UINT > UINT_VECT
Definition types.h:310
Parameters controlling the behavior of MolOps::adjustQueryProperties.
Definition MolOps.h:392
static AdjustQueryParameters noAdjustments()
returns an AdjustQueryParameters object with all adjustments disabled
Definition MolOps.h:443