RDKit
Open-source cheminformatics and machine learning.
ChemTransforms.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2006-2012 Greg Landrum
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef _RD_CHEMTRANSFORMS_H__
11 #define _RD_CHEMTRANSFORMS_H__
12 
13 #include <boost/smart_ptr.hpp>
14 #include <vector>
15 #include <iostream>
16 
18 #include "MolFragmenter.h"
19 
20 namespace RDKit {
21 class ROMol;
22 typedef boost::shared_ptr<ROMol> ROMOL_SPTR;
23 
24 //! \brief Returns a copy of an ROMol with the atoms and bonds that
25 //! match a pattern removed.
26 /*!
27  \param mol the ROMol of interest
28  \param query the query ROMol
29  \param onlyFrags if this is set, atoms will only be removed if
30  the entire fragment in which they are found is
31  matched by the query.
32  \param useChirality - if set, match the coreQuery using chirality
33 
34  \return a copy of \c mol with the matching atoms and bonds (if any)
35  removed.
36 */
37 ROMol *deleteSubstructs(const ROMol &mol, const ROMol &query,
38  bool onlyFrags = false, bool useChirality = false);
39 
40 //! \brief Returns a list of copies of an ROMol with the atoms and bonds that
41 //! match a pattern replaced with the atoms contained in another molecule.
42 /*!
43  Bonds are created between the joining atom in the existing molecule
44  and the atoms in the new molecule. So, using SMILES instead of molecules:
45  replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]') ->
46  ['[X]NCCNC(=O)O','OC(=O)NCCN[X]']
47  replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]',true) ->
48  ['[X]NCCN[X]']
49  Chains should be handled "correctly":
50  replaceSubstructs('CC(=O)C','C(=O)','[X]') ->
51  ['C[X]C']
52  As should rings:
53  replaceSubstructs('C1C(=O)C1','C(=O)','[X]') ->
54  ['C1[X]C1']
55  And higher order branches:
56  replaceSubstructs('CC(=O)(C)C','C(=O)','[X]') ->
57  ['C[X](C)C']
58  Note that the client is responsible for making sure that the
59  resulting molecule actually makes sense - this function does not
60  perform sanitization.
61 
62  \param mol the ROMol of interest
63  \param query the query ROMol
64  \param replacement the ROMol to be inserted
65  \param replaceAll if this is true, only a single result, with all
66  \param useChirality - if set, match the coreQuery using chirality
67 
68  occurances
69  of the substructure replaced, will be returned.
70  \param replacementConnectionPoint index of the atom in the replacement
71  that
72  the bond should made to
73 
74  \return a vector of pointers to copies of \c mol with the matching atoms
75  and bonds (if any) replaced
76 
77 */
78 std::vector<ROMOL_SPTR> replaceSubstructs(
79  const ROMol &mol, const ROMol &query, const ROMol &replacement,
80  bool replaceAll = false, unsigned int replacementConnectionPoint = 0,
81  bool useChirality = false);
82 
83 //! \brief Returns a copy of an ROMol with the atoms and bonds that
84 //! don't fall within a substructure match removed.
85 //!
86 //! dummy atoms are left to indicate attachment points.
87 //!
88 /*!
89  \param mol the ROMol of interest
90  \param coreQuery a query ROMol to be used to match the core
91  \param useChirality - if set, match the coreQuery using chirality
92 
93  \return a copy of \c mol with the non-matching atoms and bonds (if any)
94  removed and dummies at the connection points.
95 */
96 
97 
98 ROMol *replaceSidechains(const ROMol &mol, const ROMol &coreQuery,
99  bool useChirality = false);
100 
101 //! \brief Returns a copy of an ROMol with the atoms and bonds that
102 //! are referenced by the MatchVector removed.
103 //! MatchVector must be defined between mol and the specified core.
104 //!
105 //! dummy atoms are left to indicate attachment points.
106 //! These dummy atoms can be labeled either by the matching index
107 //! in the query or by an arbitrary "first match" found.
108 //! Additional matching options are given below.
109 //!
110 /*!
111  Note that this is essentially identical to the replaceSidechains function,
112  except we
113  invert the query and replace the atoms that *do* match the query.
114 
115  \param mol - the ROMol of interest
116  \param core - the core being matched against
117  \param matchVect - a matchVect of the type returned by Substructure Matching
118  \param replaceDummies - if set, atoms matching dummies in the core will also
119  be replaced
120  \param labelByIndex - if set, the dummy atoms at attachment points are
121  labelled with the
122  index+1 of the corresponding atom in the core
123  \param requireDummyMatch - if set, only side chains that are connected to
124  atoms in
125  the core that have attached dummies will be
126  considered.
127  Molecules that have sidechains that are attached
128  at other points will be rejected (NULL returned).
129  \param useChirality - if set, match the coreQuery using chirality
130 
131  \return a copy of \c mol with the non-matching atoms and bonds (if any)
132  removed and dummies at the connection points. The client is
133  responsible
134  for deleting this molecule. If the core query is not matched, NULL
135  is returned.
136 */
137 ROMol *replaceCore(const ROMol &mol, const ROMol &core,
138  const MatchVectType &matchVect,
139  bool replaceDummies = true,
140  bool labelByIndex = false,
141  bool requireDummyMatch = false);
142 
143 //! \brief Returns a copy of an ROMol with the atoms and bonds that
144 //! do fall within a substructure match removed.
145 //!
146 //! dummy atoms are left to indicate attachment points.
147 //!
148 /*!
149  Note that this is essentially identical to the replaceSidechains function,
150  except we
151  invert the query and replace the atoms that *do* match the query.
152 
153  \param mol - the ROMol of interest
154  \param coreQuery - a query ROMol to be used to match the core
155  \param replaceDummies - if set, atoms matching dummies in the core will also
156  be replaced
157  \param labelByIndex - if set, the dummy atoms at attachment points are
158  labelled with the
159  index+1 of the corresponding atom in the core
160  \param requireDummyMatch - if set, only side chains that are connected to
161  atoms in
162  the core that have attached dummies will be
163  considered.
164  Molecules that have sidechains that are attached
165  at other points will be rejected (NULL returned).
166  \param useChirality - if set, match the coreQuery using chirality
167 
168  \return a copy of \c mol with the non-matching atoms and bonds (if any)
169  removed and dummies at the connection points. The client is
170  responsible
171  for deleting this molecule. If the core query is not matched, NULL
172  is returned.
173 */
174 ROMol *replaceCore(const ROMol &mol, const ROMol &coreQuery,
175  bool replaceDummies = true, bool labelByIndex = false,
176  bool requireDummyMatch = false, bool useChirality = false);
177 
178 //! \brief Carries out a Murcko decomposition on the molecule provided
179 //!
180 /*!
181 
182  \param mol - the ROMol of interest
183 
184  \return a new ROMol with the Murcko scaffold
185  The client is responsible for deleting this molecule.
186 */
187 ROMol *MurckoDecompose(const ROMol &mol);
188 
189 //! \brief Combined two molecules to create a new one
190 //!
191 /*!
192 
193  \param mol1 - the first ROMol to be combined
194  \param mol2 - the second ROMol to be combined
195  \param offset - a constant offset to be added to every
196  atom position in mol2
197 
198  \return a new ROMol with the two molecules combined.
199  The new molecule has not been sanitized.
200  The client is responsible for deleting this molecule.
201 */
202 ROMol *combineMols(const ROMol &mol1, const ROMol &mol2,
203  RDGeom::Point3D offset = RDGeom::Point3D(0, 0, 0));
204 
205 //! \brief Adds named recursive queries to a molecule's atoms based on atom
206 // labels
207 //!
208 /*!
209 
210  \param mol - the molecule to be modified
211  \param queries - the dictionary of named queries to add
212  \param propName - the atom property to use to get query names
213  \param reactantLabels - to store pairs of (atom index, query string)
214 
215 
216  NOTES:
217  - existing query information, if present, will be supplemented (AND logic)
218  - non-query atoms will be replaced with query atoms using only the query
219  logic
220  - query names can be present as comma separated lists, they will then
221  be combined using OR logic.
222  - throws a KeyErrorException if a particular query name is not present
223  in \c queries
224 
225 */
227  ROMol &mol, const std::map<std::string, ROMOL_SPTR> &queries,
228  const std::string &propName,
229  std::vector<std::pair<unsigned int, std::string> > *reactantLabels = NULL);
230 
231 //! \brief parses a query definition file and sets up a set of definitions
232 //! suitable for use by addRecursiveQueries()
233 /*!
234 
235  \param filename - the name of the file to be read
236  \param queryDefs - the dictionary of named queries (return value)
237  \param standardize - if true, query names will be converted to lower
238  case
239  \param delimiter - the line delimiter in the file
240  \param comment - text used to recognize comment lines
241  \param nameColumn - column with the names of queries
242  \param smartsColumn - column with the SMARTS definitions of the queries
243 
244 */
245 void parseQueryDefFile(const std::string &filename,
246  std::map<std::string, ROMOL_SPTR> &queryDefs,
247  bool standardize = true,
248  const std::string &delimiter = "\t",
249  const std::string &comment = "//",
250  unsigned int nameColumn = 0,
251  unsigned int smartsColumn = 1);
252 //! \overload
253 void parseQueryDefFile(std::istream *inStream,
254  std::map<std::string, ROMOL_SPTR> &queryDefs,
255  bool standardize = true,
256  const std::string &delimiter = "\t",
257  const std::string &comment = "//",
258  unsigned int nameColumn = 0,
259  unsigned int smartsColumn = 1);
260 //! \brief equivalent to parseQueryDefFile() but the query definitions are
261 // explicitly passed in
262 void parseQueryDefText(const std::string &queryDefText,
263  std::map<std::string, ROMOL_SPTR> &queryDefs,
264  bool standardize = true,
265  const std::string &delimiter = "\t",
266  const std::string &comment = "//",
267  unsigned int nameColumn = 0,
268  unsigned int smartsColumn = 1);
269 }
270 
271 #endif
ROMol * replaceCore(const ROMol &mol, const ROMol &core, const MatchVectType &matchVect, bool replaceDummies=true, bool labelByIndex=false, bool requireDummyMatch=false)
Returns a copy of an ROMol with the atoms and bonds that are referenced by the MatchVector removed...
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx) ...
ROMol * combineMols(const ROMol &mol1, const ROMol &mol2, RDGeom::Point3D offset=RDGeom::Point3D(0, 0, 0))
Combined two molecules to create a new one.
void parseQueryDefText(const std::string &queryDefText, std::map< std::string, ROMOL_SPTR > &queryDefs, bool standardize=true, const std::string &delimiter="\t", const std::string &comment="//", unsigned int nameColumn=0, unsigned int smartsColumn=1)
equivalent to parseQueryDefFile() but the query definitions are
void addRecursiveQueries(ROMol &mol, const std::map< std::string, ROMOL_SPTR > &queries, const std::string &propName, std::vector< std::pair< unsigned int, std::string > > *reactantLabels=NULL)
Adds named recursive queries to a molecule&#39;s atoms based on atom.
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:103
ROMol * replaceSidechains(const ROMol &mol, const ROMol &coreQuery, bool useChirality=false)
Returns a copy of an ROMol with the atoms and bonds that don&#39;t fall within a substructure match remov...
ROMol * MurckoDecompose(const ROMol &mol)
Carries out a Murcko decomposition on the molecule provided.
ROMol * deleteSubstructs(const ROMol &mol, const ROMol &query, bool onlyFrags=false, bool useChirality=false)
Returns a copy of an ROMol with the atoms and bonds that match a pattern removed. ...
boost::shared_ptr< ROMol > ROMOL_SPTR
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
void parseQueryDefFile(const std::string &filename, std::map< std::string, ROMOL_SPTR > &queryDefs, bool standardize=true, const std::string &delimiter="\t", const std::string &comment="//", unsigned int nameColumn=0, unsigned int smartsColumn=1)
parses a query definition file and sets up a set of definitions suitable for use by addRecursiveQueri...
std::vector< ROMOL_SPTR > replaceSubstructs(const ROMol &mol, const ROMol &query, const ROMol &replacement, bool replaceAll=false, unsigned int replacementConnectionPoint=0, bool useChirality=false)
Returns a list of copies of an ROMol with the atoms and bonds that match a pattern replaced with the ...