RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SubstanceGroup.h
Go to the documentation of this file.
1//
2//
3// Copyright (C) 2018-2020 Greg Landrum and T5 Informatics GmbH
4//
5// @@ All Rights Reserved @@
6// This file is part of the RDKit.
7// The contents are covered by the terms of the BSD license
8// which is included in the file license.txt, found at the root
9// of the RDKit source tree.
10//
11/*! \file SubstanceGroup.h
12
13 \brief Defines the SubstanceGroup class
14
15*/
16#include <RDGeneral/export.h>
17#ifndef _RD_SGROUP_H
18#define _RD_SGROUP_H
19
20#include <iostream>
21#include <utility>
22#include <unordered_map>
23
24#include <Geometry/point.h>
25#include <RDGeneral/types.h>
26#include <RDGeneral/RDProps.h>
27#include <boost/smart_ptr.hpp>
28
29namespace RDKit {
30class ROMol;
31class RWMol;
32class Bond;
33class Atom;
34
35//! used to indicate errors from incorrect sgroup access
37 : public std::runtime_error {
38 public:
39 //! construct with an error message
40 SubstanceGroupException(const char *msg) : std::runtime_error(msg) {}
41 //! construct with an error message
42 SubstanceGroupException(const std::string &msg) : std::runtime_error(msg) {}
43};
44
45//! The class for representing SubstanceGroups
46/*!
47 <b>Notes:</b>
48 - These are inspired by the SGroups in the MDL formats
49 - Implementation is based on 2010 MDL SD specification:
50 http://infochim.u-strasbg.fr/recherche/Download/Fragmentor/MDL_SDF.pdf
51 - See SGroups.md for further, more comprehensive notes.
52
53*/
54
56 public:
57 //! Bond type (see V3000 spec)
58 enum class BondType {
59 XBOND, // External/Crossing bond
60 CBOND, // Internal/Contained bond
61 };
62
63 typedef std::array<RDGeom::Point3D, 3> Bracket;
64
65 //! Data structure for SAP lines (see V3000 spec)
66 //! lvIdx may not be set; this signaled with value -1
67 struct AttachPoint {
68 unsigned int aIdx;
69 int lvIdx;
70 std::string id;
71 bool operator==(const AttachPoint &other) const {
72 return aIdx == other.aIdx && lvIdx == other.lvIdx && id == other.id;
73 }
74 };
75
76 //! See specification for V3000 CSTATE
77 //! vector may or not be considered, depending on TYPE
78 struct CState {
79 unsigned int bondIdx;
81 bool operator==(const CState &other) const {
82 // note that we ignore coordinates for this
83 return bondIdx == other.bondIdx;
84 }
85 };
86
87//! No default constructor
88#ifndef SWIG
89 // Unfortunately, SWIG generated wrapper code uses temporary variables that
90 // require a default ctor not be deleted.
91 SubstanceGroup() = delete;
92#endif // !SWIG
93
94 //! Main Constructor. Ownership is only set on this side of the relationship:
95 //! mol->addSubstanceGroup(sgroup) still needs to be called to get ownership
96 //! on the other side.
97 SubstanceGroup(ROMol *owning_mol, const std::string &type);
98
99 SubstanceGroup(const SubstanceGroup &other) = default;
100 SubstanceGroup &operator=(const SubstanceGroup &other) = default;
101
102 SubstanceGroup(SubstanceGroup &&other) noexcept : RDProps(std::move(other)) {
103 dp_mol = std::exchange(other.dp_mol, nullptr);
104 d_atoms = std::move(other.d_atoms);
105 d_patoms = std::move(other.d_patoms);
106 d_bonds = std::move(other.d_bonds);
107 d_brackets = std::move(other.d_brackets);
108 d_cstates = std::move(other.d_cstates);
109 d_saps = std::move(other.d_saps);
110 }
111
113 if (this == &other) {
114 return *this;
115 }
116 RDProps::operator=(std::move(other));
117 dp_mol = std::exchange(other.dp_mol, nullptr);
118 d_atoms = std::move(other.d_atoms);
119 d_patoms = std::move(other.d_patoms);
120 d_bonds = std::move(other.d_bonds);
121 d_brackets = std::move(other.d_brackets);
122 d_cstates = std::move(other.d_cstates);
123 d_saps = std::move(other.d_saps);
124 return *this;
125 }
126
127 //! Destructor
128 ~SubstanceGroup() = default;
129
130 //! returns whether or not this belongs to a molecule
131 bool hasOwningMol() const { return dp_mol != nullptr; }
132
133 //! Get the molecule that owns this instance
135 PRECONDITION(dp_mol, "no owner");
136 return *dp_mol;
137 }
138
139 //! returns whether or not this group is valid; invalid groups must be
140 //! ignored.
141 bool getIsValid() const { return d_isValid; }
142
143 //! set whether or not this group is valid; invalid groups must be ignored.
144 void setIsValid(bool isValid) { d_isValid = isValid; }
145
146 //! get the index of this sgroup in dp_mol's sgroups vector
147 //! (do not mistake this by the ID!)
148 unsigned int getIndexInMol() const;
149
150 /* Atom and Bond methods */
151 void addAtomWithIdx(unsigned int idx);
152 void addParentAtomWithIdx(unsigned int idx);
153 void addBondWithIdx(unsigned int idx);
154 void addAtomWithBookmark(int mark);
156 void addBondWithBookmark(int mark);
157
158 // These methods should be handled with care, since they can leave
159 // Attachment points and CStates in an invalid state!
160 void removeAtomWithIdx(unsigned int idx);
161 void removeParentAtomWithIdx(unsigned int idx);
162 void removeBondWithIdx(unsigned int idx);
163
164 void addBracket(const Bracket &bracket);
165 void addCState(unsigned int bondIdx, const RDGeom::Point3D &vector);
166 void addAttachPoint(unsigned int aIdx, int lvIdx, const std::string &idStr);
167
168 BondType getBondType(unsigned int bondIdx) const;
169
170 const std::vector<unsigned int> &getAtoms() const { return d_atoms; }
171 const std::vector<unsigned int> &getParentAtoms() const { return d_patoms; }
172 const std::vector<unsigned int> &getBonds() const { return d_bonds; }
173
174 void setAtoms(std::vector<unsigned int> atoms);
175 void setParentAtoms(std::vector<unsigned int> patoms);
176 void setBonds(std::vector<unsigned int> bonds);
177
178 const std::vector<Bracket> &getBrackets() const { return d_brackets; }
179 const std::vector<CState> &getCStates() const { return d_cstates; }
180 const std::vector<AttachPoint> &getAttachPoints() const { return d_saps; }
181
182 std::vector<Bracket> &getBrackets() { return d_brackets; }
183 std::vector<CState> &getCStates() { return d_cstates; }
184 std::vector<AttachPoint> &getAttachPoints() { return d_saps; }
185
186 void clearBrackets() { d_brackets.clear(); }
187 void clearCStates() { d_cstates.clear(); }
188 void clearAttachPoints() { d_saps.clear(); }
189
190 //! adjusts our atom IDs to reflect that an atom has been removed from the
191 //! parent molecule
192 //! decrements all atom IDs that are higher than \c atomIdx
193 //! raises a \c SubstanceGroupException if \c atomIdx is actually part of
194 //! this substance group
195 //! \returns whether or not anything was changed
196 bool adjustToRemovedAtom(unsigned int atomIdx);
197
198 //! \returns whether or not anything the specified atom is part of the
199 //! definition of this substance group
200 bool includesAtom(unsigned int atomIdx) const;
201
202 //! adjusts our bond IDs to reflect that a bond has been removed from the
203 //! parent molecule
204 //! decrements all bond IDs that are higher than \c bondIdx
205 //! raises a \c SubstanceGroupException if \c bondIdx is actually part of
206 //! this substance group
207 //! \returns whether or not anything was changed
208 bool adjustToRemovedBond(unsigned int bondIdx);
209
210 //! \returns whether or not anything the specified bond is part of the
211 //! definition of this substance group
212 bool includesBond(unsigned int bondIdx) const;
213
214 //! Set owning molecule
215 //! This only updates atoms and bonds; parent sgroup has to be updated
216 //! independently, since parent might not exist at the time this is
217 //! called.
218 void setOwningMol(ROMol *mol);
219
220 bool operator==(const SubstanceGroup &other) const {
221 // we ignore brackets and cstates, which involve coordinates
222 return dp_mol == other.dp_mol && d_atoms == other.d_atoms &&
223 d_patoms == other.d_patoms && d_bonds == other.d_bonds &&
224 d_saps == other.d_saps;
225 }
226
227 private:
228 ROMol *dp_mol = nullptr; // owning molecule
229
230 bool d_isValid = true;
231
232 std::vector<unsigned int> d_atoms;
233 std::vector<unsigned int> d_patoms;
234 std::vector<unsigned int> d_bonds;
235
236 std::vector<Bracket> d_brackets;
237 std::vector<CState> d_cstates;
238 std::vector<AttachPoint> d_saps;
239}; // namespace RDKit
240
241namespace SubstanceGroupChecks {
242
243const std::vector<std::string> sGroupTypes = {
244 // polymer sgroups:
245 "SRU", "MON", "COP", "CRO", "GRA", "MOD", "MER", "ANY",
246 // formulations/mixtures:
247 "COM", "MIX", "FOR",
248 // other
249 "SUP", "MUL", "DAT", "GEN"};
250
251const std::vector<std::string> sGroupSubtypes = {"ALT", "RAN", "BLO"};
252const std::vector<std::string> sGroupConnectTypes = {"HH", "HT", "EU"};
253
254RDKIT_GRAPHMOL_EXPORT bool isValidType(const std::string &type);
255
256RDKIT_GRAPHMOL_EXPORT bool isValidSubType(const std::string &type);
257
258RDKIT_GRAPHMOL_EXPORT bool isValidConnectType(const std::string &type);
259
261 unsigned int id);
262
263} // namespace SubstanceGroupChecks
264
265//! \name SubstanceGroups and molecules
266//! @{
267
268RDKIT_GRAPHMOL_EXPORT std::vector<SubstanceGroup> &getSubstanceGroups(
269 ROMol &mol);
270RDKIT_GRAPHMOL_EXPORT const std::vector<SubstanceGroup> &getSubstanceGroups(
271 const ROMol &mol);
272
273//! Add a new SubstanceGroup. A copy is added, so we can be sure that no other
274//! references to the SubstanceGroup exist.
275/*!
276 \param sgroup - SubstanceGroup to be added to the molecule.
277*/
280
281//! Removes SubstanceGroups which reference a particular atom index
282/*!
283 \param mol - molecule to be edited.
284 \param idx - atom index
285*/
287 RWMol &mol, unsigned int idx);
288//! Removes SubstanceGroups which reference a particular bond index
289/*!
290 \param mol - molecule to be edited.
291 \param idx - bond index
292*/
294 RWMol &mol, unsigned int idx);
295//! @}
296
297} // namespace RDKit
298
299//! allows SubstanceGroup objects to be dumped to streams
300RDKIT_GRAPHMOL_EXPORT std::ostream &operator<<(std::ostream &target,
301 const RDKit::SubstanceGroup &sg);
302#endif
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
RDKIT_GRAPHMOL_EXPORT std::ostream & operator<<(std::ostream &target, const RDKit::SubstanceGroup &sg)
allows SubstanceGroup objects to be dumped to streams
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
used to indicate errors from incorrect sgroup access
SubstanceGroupException(const std::string &msg)
construct with an error message
SubstanceGroupException(const char *msg)
construct with an error message
The class for representing SubstanceGroups.
const std::vector< unsigned int > & getBonds() const
void addBondWithIdx(unsigned int idx)
void setOwningMol(ROMol *mol)
SubstanceGroup & operator=(const SubstanceGroup &other)=default
void setParentAtoms(std::vector< unsigned int > patoms)
void addAttachPoint(unsigned int aIdx, int lvIdx, const std::string &idStr)
void setBonds(std::vector< unsigned int > bonds)
~SubstanceGroup()=default
Destructor.
void setIsValid(bool isValid)
set whether or not this group is valid; invalid groups must be ignored.
const std::vector< unsigned int > & getAtoms() const
void addParentAtomWithBookmark(int mark)
void setAtoms(std::vector< unsigned int > atoms)
bool adjustToRemovedBond(unsigned int bondIdx)
void addCState(unsigned int bondIdx, const RDGeom::Point3D &vector)
ROMol & getOwningMol() const
Get the molecule that owns this instance.
const std::vector< Bracket > & getBrackets() const
SubstanceGroup()=delete
No default constructor.
bool adjustToRemovedAtom(unsigned int atomIdx)
bool operator==(const SubstanceGroup &other) const
BondType
Bond type (see V3000 spec)
SubstanceGroup(const SubstanceGroup &other)=default
SubstanceGroup(SubstanceGroup &&other) noexcept
void addBondWithBookmark(int mark)
const std::vector< unsigned int > & getParentAtoms() const
void addAtomWithBookmark(int mark)
bool includesAtom(unsigned int atomIdx) const
std::vector< Bracket > & getBrackets()
void removeParentAtomWithIdx(unsigned int idx)
SubstanceGroup(ROMol *owning_mol, const std::string &type)
void addParentAtomWithIdx(unsigned int idx)
void addAtomWithIdx(unsigned int idx)
const std::vector< CState > & getCStates() const
std::array< RDGeom::Point3D, 3 > Bracket
void addBracket(const Bracket &bracket)
const std::vector< AttachPoint > & getAttachPoints() const
std::vector< CState > & getCStates()
bool hasOwningMol() const
returns whether or not this belongs to a molecule
bool includesBond(unsigned int bondIdx) const
void removeAtomWithIdx(unsigned int idx)
std::vector< AttachPoint > & getAttachPoints()
void removeBondWithIdx(unsigned int idx)
BondType getBondType(unsigned int bondIdx) const
SubstanceGroup & operator=(SubstanceGroup &&other) noexcept
unsigned int getIndexInMol() const
#define RDKIT_GRAPHMOL_EXPORT
Definition export.h:233
RDKIT_GRAPHMOL_EXPORT bool isValidType(const std::string &type)
RDKIT_GRAPHMOL_EXPORT bool isValidSubType(const std::string &type)
const std::vector< std::string > sGroupConnectTypes
RDKIT_GRAPHMOL_EXPORT bool isSubstanceGroupIdFree(const ROMol &mol, unsigned int id)
RDKIT_GRAPHMOL_EXPORT bool isValidConnectType(const std::string &type)
const std::vector< std::string > sGroupSubtypes
const std::vector< std::string > sGroupTypes
Std stuff.
RDKIT_GRAPHMOL_EXPORT std::vector< SubstanceGroup > & getSubstanceGroups(ROMol &mol)
bool rdvalue_is(const RDValue_cast_t)
RDKIT_GRAPHMOL_EXPORT unsigned int addSubstanceGroup(ROMol &mol, SubstanceGroup sgroup)
RDKIT_GRAPHMOL_EXPORT void removeSubstanceGroupsReferencingBond(RWMol &mol, unsigned int idx)
Removes SubstanceGroups which reference a particular bond index.
RDKIT_GRAPHMOL_EXPORT void removeSubstanceGroupsReferencingAtom(RWMol &mol, unsigned int idx)
Removes SubstanceGroups which reference a particular atom index.
bool operator==(const AttachPoint &other) const
bool operator==(const CState &other) const