RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
ChemTransforms.h
Go to the documentation of this file.
1//
2// Copyright (C) 2006-2012 Greg Landrum
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef _RD_CHEMTRANSFORMS_H__
12#define _RD_CHEMTRANSFORMS_H__
13
14#include <boost/smart_ptr.hpp>
15#include <vector>
16#include <iostream>
17
19#include "MolFragmenter.h"
20
21namespace RDKit {
22class ROMol;
23typedef boost::shared_ptr<ROMol> ROMOL_SPTR;
24
25//! \brief Returns a copy of an ROMol with the atoms and bonds that
26//! match a pattern removed.
27/*!
28 \param mol the ROMol of interest
29 \param query the query ROMol
30 \param onlyFrags if this is set, atoms will only be removed if
31 the entire fragment in which they are found is
32 matched by the query.
33 \param useChirality - if set, match the coreQuery using chirality
34
35 \return a copy of \c mol with the matching atoms and bonds (if any)
36 removed.
37*/
39 const ROMol &query,
40 bool onlyFrags = false,
41 bool useChirality = false);
42
43//! \brief Returns a list of copies of an ROMol with the atoms and bonds that
44//! match a pattern replaced with the atoms contained in another molecule.
45/*!
46 Bonds are created between the joining atom in the existing molecule
47 and the atoms in the new molecule. So, using SMILES instead of molecules:
48 replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]') ->
49 ['[X]NCCNC(=O)O','OC(=O)NCCN[X]']
50 replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]',true) ->
51 ['[X]NCCN[X]']
52 Chains should be handled "correctly":
53 replaceSubstructs('CC(=O)C','C(=O)','[X]') ->
54 ['C[X]C']
55 As should rings:
56 replaceSubstructs('C1C(=O)C1','C(=O)','[X]') ->
57 ['C1[X]C1']
58 And higher order branches:
59 replaceSubstructs('CC(=O)(C)C','C(=O)','[X]') ->
60 ['C[X](C)C']
61 Note that the client is responsible for making sure that the
62 resulting molecule actually makes sense - this function does not
63 perform sanitization.
64
65 \param mol the ROMol of interest
66 \param query the query ROMol
67 \param replacement the ROMol to be inserted
68 \param replaceAll if this is true, only a single result, with all
69 \param useChirality - if set, match the coreQuery using chirality
70
71 occurrences
72 of the substructure replaced, will be returned.
73 \param replacementConnectionPoint index of the atom in the replacement
74 that
75 the bond should made to
76
77 \return a vector of pointers to copies of \c mol with the matching atoms
78 and bonds (if any) replaced
79
80*/
82 const ROMol &mol, const ROMol &query, const ROMol &replacement,
83 bool replaceAll = false, unsigned int replacementConnectionPoint = 0,
84 bool useChirality = false);
85
86//! \brief Returns a copy of an ROMol with the atoms and bonds that
87//! don't fall within a substructure match removed.
88//!
89//! dummy atoms are left to indicate attachment points.
90//!
91/*!
92 \param mol the ROMol of interest
93 \param coreQuery a query ROMol to be used to match the core
94 \param useChirality - if set, match the coreQuery using chirality
95
96 \return a copy of \c mol with the non-matching atoms and bonds (if any)
97 removed and dummies at the connection points.
98*/
99
101 const ROMol &coreQuery,
102 bool useChirality = false);
103
104//! \brief Returns a copy of an ROMol with the atoms and bonds that
105//! are referenced by the MatchVector removed.
106//! MatchVector must be defined between mol and the specified core.
107//!
108//! dummy atoms are left to indicate attachment points.
109//! These dummy atoms can be labeled either by the matching index
110//! in the query or by an arbitrary "first match" found.
111//! Additional matching options are given below.
112//!
113/*!
114 Note that this is essentially identical to the replaceSidechains function,
115 except we
116 invert the query and replace the atoms that *do* match the query.
117
118 \param mol - the ROMol of interest
119 \param core - the core being matched against
120 \param matchVect - a matchVect of the type returned by Substructure
121 Matching \param replaceDummies - if set, atoms matching dummies in the core
122 will also be replaced \param labelByIndex - if set, the dummy atoms at
123 attachment points are labelled with the index+1 of the corresponding atom in
124 the core \param requireDummyMatch - if set, only side chains that are
125 connected to atoms in the core that have attached dummies will be considered.
126 Molecules that have sidechains that are attached
127 at other points will be rejected (NULL returned).
128 \param useChirality - if set, match the coreQuery using chirality
129
130 \return a copy of \c mol with the non-matching atoms and bonds (if any)
131 removed and dummies at the connection points. The client is
132 responsible
133 for deleting this molecule. If the core query is not matched, NULL
134 is returned.
135*/
137 const ROMol &core,
139 bool replaceDummies = true,
140 bool labelByIndex = false,
141 bool requireDummyMatch = false);
142
143//! \brief Returns a copy of an ROMol with the atoms and bonds that
144//! do fall within a substructure match removed.
145//!
146//! dummy atoms are left to indicate attachment points.
147//!
148/*!
149 Note that this is essentially identical to the replaceSidechains function,
150 except we
151 invert the query and replace the atoms that *do* match the query.
152
153 \param mol - the ROMol of interest
154 \param coreQuery - a query ROMol to be used to match the core
155 \param replaceDummies - if set, atoms matching dummies in the core will also
156 be replaced
157 \param labelByIndex - if set, the dummy atoms at attachment points are
158 labelled with the
159 index+1 of the corresponding atom in the core
160 \param requireDummyMatch - if set, only side chains that are connected to
161 atoms in
162 the core that have attached dummies will be
163 considered.
164 Molecules that have sidechains that are attached
165 at other points will be rejected (NULL returned).
166 \param useChirality - if set, match the coreQuery using chirality
167
168 \return a copy of \c mol with the non-matching atoms and bonds (if any)
169 removed and dummies at the connection points. The client is
170 responsible
171 for deleting this molecule. If the core query is not matched, NULL
172 is returned.
173*/
175 const ROMol &coreQuery,
176 bool replaceDummies = true,
177 bool labelByIndex = false,
178 bool requireDummyMatch = false,
179 bool useChirality = false);
180
181//! \brief Carries out a Murcko decomposition on the molecule provided
182//!
183/*!
184
185 \param mol - the ROMol of interest
186
187 \return a new ROMol with the Murcko scaffold
188 The client is responsible for deleting this molecule.
189*/
191
192//! \brief Combined two molecules to create a new one
193//!
194/*!
195
196 \param mol1 - the first ROMol to be combined
197 \param mol2 - the second ROMol to be combined
198 \param offset - a constant offset to be added to every
199 atom position in mol2
200
201 \return a new ROMol with the two molecules combined.
202 The new molecule has not been sanitized.
203 The client is responsible for deleting this molecule.
204*/
206 const ROMol &mol1, const ROMol &mol2,
207 RDGeom::Point3D offset = RDGeom::Point3D(0, 0, 0));
208
209//! \brief Adds named recursive queries to a molecule's atoms based on atom
210/// labels
211//!
212/*!
213
214 \param mol - the molecule to be modified
215 \param queries - the dictionary of named queries to add
216 \param propName - the atom property to use to get query names
217 \param reactantLabels - to store pairs of (atom index, query string)
218
219
220 NOTES:
221 - existing query information, if present, will be supplemented (AND logic)
222 - non-query atoms will be replaced with query atoms using only the query
223 logic
224 - query names can be present as comma separated lists, they will then
225 be combined using OR logic.
226 - throws a KeyErrorException if a particular query name is not present
227 in \c queries
228
229*/
231 ROMol &mol, const std::map<std::string, ROMOL_SPTR> &queries,
232 const std::string &propName,
233 std::vector<std::pair<unsigned int, std::string>> *reactantLabels =
234 nullptr);
235
236//! \brief parses a query definition file and sets up a set of definitions
237//! suitable for use by addRecursiveQueries()
238/*!
239
240 \param filename - the name of the file to be read
241 \param queryDefs - the dictionary of named queries (return value)
242 \param standardize - if true, query names will be converted to lower
243 case
244 \param delimiter - the line delimiter in the file
245 \param comment - text used to recognize comment lines
246 \param nameColumn - column with the names of queries
247 \param smartsColumn - column with the SMARTS definitions of the queries
248
249*/
251 const std::string &filename, std::map<std::string, ROMOL_SPTR> &queryDefs,
252 bool standardize = true, const std::string &delimiter = "\t",
253 const std::string &comment = "//", unsigned int nameColumn = 0,
254 unsigned int smartsColumn = 1);
255//! \overload
257 std::istream *inStream, std::map<std::string, ROMOL_SPTR> &queryDefs,
258 bool standardize = true, const std::string &delimiter = "\t",
259 const std::string &comment = "//", unsigned int nameColumn = 0,
260 unsigned int smartsColumn = 1);
261//! \brief equivalent to parseQueryDefFile() but the query definitions are
262/// explicitly passed in
264 const std::string &queryDefText,
265 std::map<std::string, ROMOL_SPTR> &queryDefs, bool standardize = true,
266 const std::string &delimiter = "\t", const std::string &comment = "//",
267 unsigned int nameColumn = 0, unsigned int smartsColumn = 1);
268
269namespace details {
270//! not recommended for use in other code
272 const ROMol &mol, RWMol &res, boost::dynamic_bitset<> &removedAtoms);
274 const std::map<const Atom *, Atom *> &molAtomMap, const ROMol &mol,
275 RWMol &newMol);
276} // namespace details
277
278} // namespace RDKit
279#endif
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
#define RDKIT_CHEMTRANSFORMS_EXPORT
Definition export.h:57
RDKIT_CHEMTRANSFORMS_EXPORT void updateSubMolConfs(const ROMol &mol, RWMol &res, boost::dynamic_bitset<> &removedAtoms)
not recommended for use in other code
RDKIT_CHEMTRANSFORMS_EXPORT void copyStereoGroups(const std::map< const Atom *, Atom * > &molAtomMap, const ROMol &mol, RWMol &newMol)
Std stuff.
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * replaceCore(const ROMol &mol, const ROMol &core, const MatchVectType &matchVect, bool replaceDummies=true, bool labelByIndex=false, bool requireDummyMatch=false)
Returns a copy of an ROMol with the atoms and bonds that are referenced by the MatchVector removed....
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * combineMols(const ROMol &mol1, const ROMol &mol2, RDGeom::Point3D offset=RDGeom::Point3D(0, 0, 0))
Combined two molecules to create a new one.
RDKIT_CHEMTRANSFORMS_EXPORT std::vector< ROMOL_SPTR > replaceSubstructs(const ROMol &mol, const ROMol &query, const ROMol &replacement, bool replaceAll=false, unsigned int replacementConnectionPoint=0, bool useChirality=false)
Returns a list of copies of an ROMol with the atoms and bonds that match a pattern replaced with the ...
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefText(const std::string &queryDefText, std::map< std::string, ROMOL_SPTR > &queryDefs, bool standardize=true, const std::string &delimiter="\t", const std::string &comment="//", unsigned int nameColumn=0, unsigned int smartsColumn=1)
equivalent to parseQueryDefFile() but the query definitions are explicitly passed in
bool rdvalue_is(const RDValue_cast_t)
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * deleteSubstructs(const ROMol &mol, const ROMol &query, bool onlyFrags=false, bool useChirality=false)
Returns a copy of an ROMol with the atoms and bonds that match a pattern removed.
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * MurckoDecompose(const ROMol &mol)
Carries out a Murcko decomposition on the molecule provided.
RDKIT_CHEMTRANSFORMS_EXPORT void addRecursiveQueries(ROMol &mol, const std::map< std::string, ROMOL_SPTR > &queries, const std::string &propName, std::vector< std::pair< unsigned int, std::string > > *reactantLabels=nullptr)
Adds named recursive queries to a molecule's atoms based on atom labels.
RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefFile(const std::string &filename, std::map< std::string, ROMOL_SPTR > &queryDefs, bool standardize=true, const std::string &delimiter="\t", const std::string &comment="//", unsigned int nameColumn=0, unsigned int smartsColumn=1)
parses a query definition file and sets up a set of definitions suitable for use by addRecursiveQueri...
boost::shared_ptr< ROMol > ROMOL_SPTR
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * replaceSidechains(const ROMol &mol, const ROMol &coreQuery, bool useChirality=false)
Returns a copy of an ROMol with the atoms and bonds that don't fall within a substructure match remov...