RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
RDDepictor.h
Go to the documentation of this file.
1//
2// Copyright (C) 2003-2022 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RDDEPICTOR_H
13#define RDDEPICTOR_H
14
16#include <RDGeneral/types.h>
17#include <Geometry/point.h>
18#include <boost/smart_ptr.hpp>
19
20namespace RDKit {
21class ROMol;
22}
23
24namespace RDDepict {
25
26RDKIT_DEPICTOR_EXPORT extern bool
27 preferCoordGen; // Ignored if coordgen support isn't active
28
29typedef boost::shared_array<double> DOUBLE_SMART_PTR;
30
31class RDKIT_DEPICTOR_EXPORT DepictException : public std::exception {
32 public:
33 DepictException(const char *msg) : _msg(msg) {}
34 DepictException(const std::string msg) : _msg(msg) {}
35 const char *what() const noexcept override { return _msg.c_str(); }
36 ~DepictException() noexcept override = default;
37
38 private:
39 std::string _msg;
40};
41
42//! \brief Set the path to the file containing the ring system templates
43/*!
44
45 \param templatePath the file path to a file containing the ring system
46 templates. Each template must be a single line in the file represented using
47 CXSMILES, and the structure should be a single ring system.
48
49 \throws DepictException if any of the templates are invalid
50*/
52setRingSystemTemplates(const std::string templatePath);
53
54//! \brief Add ring system templates to be used in 2D coordinater generation.
55/// If there are duplicates, the most recently added template will be used.
56/*!
57
58 \param templatePath the file path to a file containing the ring system
59 templates. Each template must be a single line in the file represented using
60 CXSMILES, and the structure should be a single ring system.
61
62 \throws DepictException if any of the templates are invalid
63*/
65addRingSystemTemplates(const std::string templatePath);
66
67//! \brief Load default ring system templates to be used in 2D coordinate
68//! generation
69void RDKIT_DEPICTOR_EXPORT loadDefaultRingSystemTemplates();
70
72 const RDGeom::INT_POINT2D_MAP *coordMap =
73 nullptr; //!< a map of int to Point2D, between atom IDs and their
74 //!< locations. This is the container the user needs to
75 //!< fill if he/she wants to specify coordinates for a portion
76 //!< of the molecule, defaults to 0
77 bool canonOrient = false; //!< canonicalize the orientation so that the long
78 //!< axes align with the x-axis etc.
79 bool clearConfs = true; //!< clear all existing conformations on the molecule
80 //!< before adding the 2D coordinates instead of
81 //!< simply adding to the list
82 unsigned int nFlipsPerSample = 0; //!< the number of rotatable bonds that are
83 //!< flipped at random for each sample
84 unsigned int nSamples = 0; //!< the number of samples
85 int sampleSeed = 0; //!< seed for the random sampling process
86 bool permuteDeg4Nodes = false; //!< try permuting the drawing order of bonds
87 //!< around atoms with four neighbors in order
88 //!< to improve the depiction
89 bool forceRDKit = false; //!< use RDKit to generate coordinates even if
90 //!< preferCoordGen is set to true
91 bool useRingTemplates = false; //!< whether to use ring system templates for
92 //!< generating initial coordinates
93
95};
96
97//! \brief Generate 2D coordinates (a depiction) for a molecule
98/*!
99
100 \param mol the molecule were are interested in
101
102 \param params parameters used for 2D coordinate generation
103
104 \return ID of the conformation added to the molecule containing the
105 2D coordinates
106
107*/
109 RDKit::ROMol &mol, const Compute2DCoordParameters &params);
110
111//! \brief Generate 2D coordinates (a depiction) for a molecule
112/*!
113
114 \param mol the molecule were are interested in
115
116 \param coordMap a map of int to Point2D, between atom IDs and
117 their locations. This is the container the user needs to fill if
118 he/she wants to specify coordinates for a portion of the molecule,
119 defaults to 0
120
121 \param canonOrient canonicalize the orientation so that the long
122 axes align with the x-axis etc.
123
124 \param clearConfs clear all existing conformations on the molecule
125 before adding the 2D coordinates instead of simply adding to the
126 list
127
128 \param nFlipsPerSample - the number of rotatable bonds that are
129 flipped at random for each sample
130
131 \param nSamples - the number of samples
132
133 \param sampleSeed - seed for the random sampling process
134
135 \param permuteDeg4Nodes - try permuting the drawing order of bonds around
136 atoms with four neighbors in order to improve the depiction
137
138 \param forceRDKit - use RDKit to generate coordinates even if
139 preferCoordGen is set to true
140
141 \param useRingTemplates whether to use ring system templates for generating
142 initial coordinates
143
144 \return ID of the conformation added to the molecule containing the
145 2D coordinates
146
147*/
149 RDKit::ROMol &mol, const RDGeom::INT_POINT2D_MAP *coordMap = nullptr,
150 bool canonOrient = false, bool clearConfs = true,
151 unsigned int nFlipsPerSample = 0, unsigned int nSamples = 0,
152 int sampleSeed = 0, bool permuteDeg4Nodes = false, bool forceRDKit = false,
153 bool useRingTemplates = false);
154
155//! \brief Compute the 2D coordinates such the interatom distances
156/// mimic those in a distance matrix
157/*!
158
159 This function generates 2D coordinates such that the inter-atom
160 distances mimic those specified via dmat. This is done by randomly
161 sampling(flipping) the rotatable bonds in the molecule and
162 evaluating a cost function which contains two components. The
163 first component is the sum of inverse of the squared inter-atom
164 distances, this helps in spreading the atoms far from each
165 other. The second component is the sum of squares of the
166 difference in distance between those in dmat and the generated
167 structure. The user can adjust the relative importance of the two
168 components via a adjustable parameter (see below)
169
170 ARGUMENTS:
171
172 \param mol - molecule to generate coordinates for
173
174 \param dmat - the distance matrix we want to mimic, this is a
175 symmetric N by N matrix where N is the number of atoms in mol. All
176 negative entries in dmat are ignored.
177
178 \param canonOrient - canonicalize the orientation after the 2D
179 embedding is done
180
181 \param clearConfs - clear any previously existing conformations on
182 mol before adding a conformation
183
184 \param weightDistMat - A value between 0.0 and 1.0, this
185 determines the importance of mimicing the inter atoms
186 distances in dmat. (1.0 - weightDistMat) is the weight associated
187 to spreading out the structure (density) in the cost function
188
189 \param nFlipsPerSample - the number of rotatable bonds that are
190 flipped at random for each sample
191
192 \param nSamples - the number of samples
193
194 \param sampleSeed - seed for the random sampling process
195
196 \param permuteDeg4Nodes - try permuting the drawing order of bonds around
197 atoms with four neighbors in order to improve the depiction
198
199 \param forceRDKit - use RDKit to generate coordinates even if
200 preferCoordGen is set to true
201
202 \return ID of the conformation added to the molecule containing the
203 2D coordinates
204
205
206*/
208 RDKit::ROMol &mol, const DOUBLE_SMART_PTR *dmat = nullptr,
209 bool canonOrient = true, bool clearConfs = true, double weightDistMat = 0.5,
210 unsigned int nFlipsPerSample = 3, unsigned int nSamples = 100,
211 int sampleSeed = 25, bool permuteDeg4Nodes = true, bool forceRDKit = false);
212
214 //! if false (default), a DepictException is thrown if the molecule
215 /// does not have a substructure match to the reference;
216 /// if true, an unconstrained depiction will be generated
217 bool acceptFailure = false;
218 //! if true, use RDKit to generate coordinates even if preferCoordGen
219 /// is set to true; defaults to false
220 bool forceRDKit = false;
221 //! if true, terminal dummy atoms in the reference are ignored
222 /// if they match an implicit hydrogen in the molecule or if they are
223 /// attached top a query atom; defaults to false
224 bool allowRGroups = false;
225 //! if false (default), a part of the molecule is hard-constrained
226 /// to have the same coordinates as the reference, and the rest of
227 // the molecule is built around it; if true, coordinates
228 /// from conformation existingConfId are preserved (if they exist)
229 /// or generated without constraints (if they do not exist), then
230 /// the conformation is rigid-body aligned to the reference
231 bool alignOnly = false;
232 //! if true (default), existing wedging information will be updated
233 /// or cleared as required; if false (default), existing molblock
234 /// wedging information will always be preserved
235 bool adjustMolBlockWedging = true;
236 //! conformation id whose 2D coordinates should be
237 /// * rigid-body aligned to the reference (if alignOnly is true)
238 /// * used to determine whether existing molblock wedging information
239 /// can be preserved following the constrained depiction (if
240 /// adjustMolBlockWedging is true)
241 int existingConfId = -1;
242};
243
244//! \brief Compute 2D coordinates where a piece of the molecule is
245/// constrained to have the same coordinates as a reference.
246/// Correspondences between reference and molecule atom indices
247/// are determined by refMatchVect.
248/*!
249 This function generates a depiction for a molecule where a piece of the
250 molecule is constrained to have the same coordinates as a reference.
251
252 This is useful for, for example, generating depictions of SAR data
253 sets so that the cores of the molecules are all oriented the same way.
254 This overload allow to specify the (referenceAtom, molAtom) index pairs
255 which should be matched as MatchVectType. Please note that the
256 vector can be shorter than the number of atoms in the reference.
257
258 ARGUMENTS:
259
260 \param mol - the molecule to be aligned, this will come back
261 with a single conformer.
262 \param reference - a molecule with the reference atoms to align to;
263 this should have a depiction.
264 \param refMatchVect - a MatchVectType that will be used to
265 generate the atom mapping between the molecule
266 and the reference.
267 \param confId - (optional) the id of the reference conformation to use
268 \param params - (optional) an instance of ConstrainedDepictionParams
269*/
271 RDKit::ROMol &mol, const RDKit::ROMol &reference,
272 const RDKit::MatchVectType &refMatchVect, int confId = -1,
274
275//! \brief Overload
276/*!
277 ARGUMENTS:
278
279 \param mol - the molecule to be aligned, this will come back
280 with a single conformer.
281 \param reference - a molecule with the reference atoms to align to;
282 this should have a depiction.
283 \param refMatchVect - a MatchVectType that will be used to
284 generate the atom mapping between the molecule
285 and the reference.
286 \param confId - the id of the reference conformation to use
287 \param forceRDKit - use RDKit to generate coordinates even if
288 preferCoordGen is set to true
289*/
291 RDKit::ROMol &mol, const RDKit::ROMol &reference,
292 const RDKit::MatchVectType &refMatchVect, int confId, bool forceRDKit);
293
294//! \brief Compute 2D coordinates constrained to a reference;
295/// the constraint can be hard (default) or soft.
296/*!
297 Hard (default, ConstrainedDepictionParams::alignOnly = false):
298 Existing molecule coordinates, if present, are discarded;
299 new coordinates are generated constraining a piece of the molecule
300 to have the same coordinates as the reference, while the rest of
301 the molecule is built around it.
302 If ConstrainedDepictionParams::adjustMolBlockWedging is false
303 (default), existing molblock wedging information is always preserved.
304 If ConstrainedDepictionParams::adjustMolBlockWedging is true,
305 existing molblock wedging information is preserved in case it
306 only involves the invariant core and the core conformation has not
307 changed, while it is cleared in case the wedging is also outside
308 the invariant core, or core coordinates were changed.
309 If ConstrainedDepictionParams::acceptFailure is set to true and no
310 substructure match is found, coordinates will be recomputed from
311 scratch, hence molblock wedging information will be cleared.
312
313 Soft (ConstrainedDepictionParams::alignOnly = true):
314 Existing coordinates in the conformation identified by
315 ConstrainedDepictionParams::existingConfId are preserved if present,
316 otherwise unconstrained new coordinates are generated.
317 Subsequently, coodinates undergo a rigid-body alignment to the reference.
318 If ConstrainedDepictionParams::adjustMolBlockWedging is false
319 (default), existing molblock wedging information is always preserved.
320 If ConstrainedDepictionParams::adjustMolBlockWedging is true,
321 existing molblock wedging information is inverted in case the rigid-body
322 alignment involved a flip around the Z axis.
323
324 This is useful, for example, for generating depictions
325 of SAR data sets such that the cores of the molecules are all oriented
326 the same way.
327
328 ARGUMENTS:
329
330 \param mol - the molecule to be aligned, this will come back
331 with a single conformer.
332 \param reference - a molecule with the reference atoms to align to;
333 this should have a depiction.
334 \param confId - (optional) the id of the reference conformation to use
335 \param referencePattern - (optional) a query molecule to be used to
336 generate the atom mapping between the molecule
337 and the reference.
338 \param params - (optional) a ConstrainedDepictionParams instance
339 RETURNS:
340
341 \return MatchVectType with (queryAtomidx, molAtomIdx) pairs used for
342 the constrained depiction
343*/
345 RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId = -1,
346 const RDKit::ROMol *referencePattern =
347 static_cast<const RDKit::ROMol *>(nullptr),
349
350//! \brief Compute 2D coordinates where a piece of the molecule is
351/// constrained to have the same coordinates as a reference.
352/*!
353 This function generates a depiction for a molecule where a piece of the
354 molecule is constrained to have the same coordinates as a reference.
355
356 This is useful, for example, for generating depictions
357 of SAR data sets such that the cores of the molecules are all oriented
358 the same way.
359
360 ARGUMENTS:
361
362 \param mol - the molecule to be aligned, this will come back
363 with a single conformer.
364 \param reference - a molecule with the reference atoms to align to;
365 this should have a depiction.
366 \param confId - the id of the reference conformation to use
367 \param referencePattern - a query molecule to be used to
368 generate the atom mapping between the molecule
369 and the reference.
370 \param acceptFailure - if true, standard depictions will be
371 generated for molecules that don't have a substructure
372 match to the reference; if false, throws a
373 DepictException.
374 \param forceRDKit - (optional) use RDKit to generate coordinates even if
375 preferCoordGen is set to true
376 \param allowOptionalAttachments - (optional) if true, terminal dummy atoms in
377 the reference are ignored if they match an implicit
378 hydrogen in the molecule, and a constrained
379 depiction is still attempted
380 RETURNS:
381
382 \return MatchVectType with (queryAtomidx, molAtomIdx) pairs used for
383 the constrained depiction
384*/
386 RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId,
387 const RDKit::ROMol *referencePattern, bool acceptFailure,
388 bool forceRDKit = false, bool allowOptionalAttachments = false);
389
390//! \brief Generate a 2D depiction for a molecule where all or part of
391/// it mimics the coordinates of a 3D reference structure.
392/*!
393 Generates a depiction for a molecule where a piece of the molecule
394 is constrained to have coordinates similar to those of a 3D reference
395 structure.
396
397 ARGUMENTS:
398 \param mol - the molecule to be aligned, this will come back
399 with a single conformer containing 2D coordinates
400 \param reference - a molecule with the reference atoms to align to.
401 By default this should be the same as mol, but with
402 3D coordinates
403 \param confId - (optional) the id of the reference conformation to use
404 \param refPattern - (optional) a query molecule to map a subset of
405 the reference onto the mol, so that only some of the
406 atoms are aligned.
407 \param acceptFailure - (optional) if true, standard depictions will be
408 generated
409 for molecules that don't match the reference or the
410 referencePattern; if false, throws a DepictException.
411 \param forceRDKit - (optional) use RDKit to generate coordinates even if
412 preferCoordGen is set to true
413*/
415 RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId = -1,
416 RDKit::ROMol *referencePattern = nullptr, bool acceptFailure = false,
417 bool forceRDKit = false);
418
419//! \brief Rotate the 2D depiction such that the majority of bonds have an
420//! angle with the X axis which is a multiple of 30 degrees.
421/*!
422
423 ARGUMENTS:
424 \param mol - the molecule to be rotated
425 \param confId - (optional) the id of the reference conformation to use
426 \param minimizeRotation - (optional) if false (the default), the molecule
427 is rotated such that the majority of bonds have an angle with the
428 X axis of 30 or 90 degrees. If true, the minimum rotation is applied
429 such that the majority of bonds have an angle with the X axis of
430 0, 30, 60, or 90 degrees, with the goal of altering the initial
431 orientation as little as possible .
432*/
433
435 int confId = -1,
436 bool minimizeRotation = false);
437
438//! \brief Normalizes the 2D depiction.
439/*!
440 If canonicalize is != 0, the depiction is subjected to a canonical
441 transformation such that its main axis is aligned along the X axis
442 (canonicalize >0, the default) or the Y axis (canonicalize <0).
443 If canonicalize is 0, no canonicalization takes place.
444 If scaleFactor is <0.0 (the default) the depiction is scaled such
445 that bond lengths conform to RDKit standards. The applied scaling
446 factor is returned.
447
448 ARGUMENTS:
449 \param mol - the molecule to be normalized
450 \param confId - (optional) the id of the reference conformation to use
451 \param canonicalize - (optional) if != 0, a canonical transformation is
452 applied: if >0 (the default), the main molecule axis is
453 aligned to the X axis, if <0 to the Y axis.
454 If 0, no canonical transformation is applied.
455 \param scaleFactor - (optional) if >0.0, the scaling factor to apply. The
456 default (-1.0) means that the depiction is automatically
457 scaled such that bond lengths are the standard RDKit
458 ones.
459 RETURNS:
460
461 \return the applied scaling factor.
462*/
463
465 int confId = -1,
466 int canonicalize = 1,
467 double scaleFactor = -1.0);
468}; // namespace RDDepict
469
470#endif
DepictException(const char *msg)
Definition RDDepictor.h:33
~DepictException() noexcept override=default
DepictException(const std::string msg)
Definition RDDepictor.h:34
const char * what() const noexcept override
Definition RDDepictor.h:35
#define RDKIT_DEPICTOR_EXPORT
Definition export.h:89
boost::shared_array< double > DOUBLE_SMART_PTR
RDKIT_DEPICTOR_EXPORT void generateDepictionMatching2DStructure(RDKit::ROMol &mol, const RDKit::ROMol &reference, const RDKit::MatchVectType &refMatchVect, int confId=-1, const ConstrainedDepictionParams &params=ConstrainedDepictionParams())
Compute 2D coordinates where a piece of the molecule is constrained to have the same coordinates as a...
RDKIT_DEPICTOR_EXPORT void generateDepictionMatching3DStructure(RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId=-1, RDKit::ROMol *referencePattern=nullptr, bool acceptFailure=false, bool forceRDKit=false)
Generate a 2D depiction for a molecule where all or part of it mimics the coordinates of a 3D referen...
RDKIT_DEPICTOR_EXPORT double normalizeDepiction(RDKit::ROMol &mol, int confId=-1, int canonicalize=1, double scaleFactor=-1.0)
Normalizes the 2D depiction.
RDKIT_DEPICTOR_EXPORT unsigned int compute2DCoords(RDKit::ROMol &mol, const Compute2DCoordParameters &params)
Generate 2D coordinates (a depiction) for a molecule.
RDKIT_DEPICTOR_EXPORT void straightenDepiction(RDKit::ROMol &mol, int confId=-1, bool minimizeRotation=false)
Rotate the 2D depiction such that the majority of bonds have an angle with the X axis which is a mult...
RDKIT_DEPICTOR_EXPORT unsigned int compute2DCoordsMimicDistMat(RDKit::ROMol &mol, const DOUBLE_SMART_PTR *dmat=nullptr, bool canonOrient=true, bool clearConfs=true, double weightDistMat=0.5, unsigned int nFlipsPerSample=3, unsigned int nSamples=100, int sampleSeed=25, bool permuteDeg4Nodes=true, bool forceRDKit=false)
Compute the 2D coordinates such the interatom distances mimic those in a distance matrix.
RDKIT_DEPICTOR_EXPORT bool preferCoordGen
std::map< int, Point2D > INT_POINT2D_MAP
Definition point.h:550
Std stuff.
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)