RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
RDDepictor.h
Go to the documentation of this file.
1//
2// Copyright (C) 2003-2022 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RDDEPICTOR_H
13#define RDDEPICTOR_H
14
16#include <RDGeneral/types.h>
17#include <Geometry/point.h>
18#include <boost/smart_ptr.hpp>
19
20namespace RDKit {
21class ROMol;
22}
23
24namespace RDDepict {
25
26RDKIT_DEPICTOR_EXPORT extern bool
27 preferCoordGen; // Ignored if coordgen support isn't active
28
29typedef boost::shared_array<double> DOUBLE_SMART_PTR;
30
31class RDKIT_DEPICTOR_EXPORT DepictException : public std::exception {
32 public:
33 DepictException(const char *msg) : _msg(msg) {}
34 DepictException(const std::string msg) : _msg(msg) {}
35 const char *what() const noexcept override { return _msg.c_str(); }
36 ~DepictException() noexcept override = default;
37
38 private:
39 std::string _msg;
40};
41
42//! \brief Set the path to the file containing the ring system templates
43/*!
44
45 \param templatePath the file path to a file containing the ring system
46 templates. Each template must be a single line in the file represented using
47 CXSMILES, and the structure should be a single ring system.
48
49 \throws DepictException if any of the templates are invalid
50*/
52setRingSystemTemplates(const std::string templatePath);
53
54//! \brief Add ring system templates to be used in 2D coordinater generation.
55/// If there are duplicates, the most recently added template will be used.
56/*!
57
58 \param templatePath the file path to a file containing the ring system
59 templates. Each template must be a single line in the file represented using
60 CXSMILES, and the structure should be a single ring system.
61
62 \throws DepictException if any of the templates are invalid
63*/
65addRingSystemTemplates(const std::string templatePath);
66
67//! \brief Load default ring system templates to be used in 2D coordinate
68//! generation
69void RDKIT_DEPICTOR_EXPORT loadDefaultRingSystemTemplates();
70
72 const RDGeom::INT_POINT2D_MAP *coordMap =
73 nullptr; //!< a map of int to Point2D, between atom IDs and their
74 //!< locations. This is the container the user needs to
75 //!< fill if he/she wants to specify coordinates for a portion
76 //!< of the molecule, defaults to 0
77 bool canonOrient = false; //!< canonicalize the orientation so that the long
78 //!< axes align with the x-axis etc.
79 bool clearConfs = true; //!< clear all existing conformations on the molecule
80 //!< before adding the 2D coordinates instead of
81 //!< simply adding to the list
82 unsigned int nFlipsPerSample = 0; //!< the number of rotatable bonds that are
83 //!< flipped at random for each sample
84 unsigned int nSamples = 0; //!< the number of samples
85 int sampleSeed = 0; //!< seed for the random sampling process
86 bool permuteDeg4Nodes = false; //!< try permuting the drawing order of bonds
87 //!< around atoms with four neighbors in order
88 //!< to improve the depiction
89 bool forceRDKit = false; //!< use RDKit to generate coordinates even if
90 //!< preferCoordGen is set to true
91 bool useRingTemplates = false; //!< whether to use ring system templates for
92 //!< generating initial coordinates
93
94};
95
96//! \brief Generate 2D coordinates (a depiction) for a molecule
97/*!
98
99 \param mol the molecule were are interested in
100
101 \param params parameters used for 2D coordinate generation
102
103 \return ID of the conformation added to the molecule containing the
104 2D coordinates
105
106*/
108 RDKit::ROMol &mol, const Compute2DCoordParameters &params);
109
110//! \brief Generate 2D coordinates (a depiction) for a molecule
111/*!
112
113 \param mol the molecule were are interested in
114
115 \param coordMap a map of int to Point2D, between atom IDs and
116 their locations. This is the container the user needs to fill if
117 he/she wants to specify coordinates for a portion of the molecule,
118 defaults to 0
119
120 \param canonOrient canonicalize the orientation so that the long
121 axes align with the x-axis etc.
122
123 \param clearConfs clear all existing conformations on the molecule
124 before adding the 2D coordinates instead of simply adding to the
125 list
126
127 \param nFlipsPerSample - the number of rotatable bonds that are
128 flipped at random for each sample
129
130 \param nSamples - the number of samples
131
132 \param sampleSeed - seed for the random sampling process
133
134 \param permuteDeg4Nodes - try permuting the drawing order of bonds around
135 atoms with four neighbors in order to improve the depiction
136
137 \param forceRDKit - use RDKit to generate coordinates even if
138 preferCoordGen is set to true
139
140 \param useRingTemplates whether to use ring system templates for generating
141 initial coordinates
142
143 \return ID of the conformation added to the molecule containing the
144 2D coordinates
145
146*/
148 RDKit::ROMol &mol, const RDGeom::INT_POINT2D_MAP *coordMap = nullptr,
149 bool canonOrient = false, bool clearConfs = true,
150 unsigned int nFlipsPerSample = 0, unsigned int nSamples = 0,
151 int sampleSeed = 0, bool permuteDeg4Nodes = false, bool forceRDKit = false,
152 bool useRingTemplates = false);
153
154//! \brief Compute the 2D coordinates such the interatom distances
155/// mimic those in a distance matrix
156/*!
157
158 This function generates 2D coordinates such that the inter-atom
159 distances mimic those specified via dmat. This is done by randomly
160 sampling(flipping) the rotatable bonds in the molecule and
161 evaluating a cost function which contains two components. The
162 first component is the sum of inverse of the squared inter-atom
163 distances, this helps in spreading the atoms far from each
164 other. The second component is the sum of squares of the
165 difference in distance between those in dmat and the generated
166 structure. The user can adjust the relative importance of the two
167 components via a adjustable parameter (see below)
168
169 ARGUMENTS:
170
171 \param mol - molecule to generate coordinates for
172
173 \param dmat - the distance matrix we want to mimic, this is a
174 symmetric N by N matrix where N is the number of atoms in mol. All
175 negative entries in dmat are ignored.
176
177 \param canonOrient - canonicalize the orientation after the 2D
178 embedding is done
179
180 \param clearConfs - clear any previously existing conformations on
181 mol before adding a conformation
182
183 \param weightDistMat - A value between 0.0 and 1.0, this
184 determines the importance of mimicing the inter atoms
185 distances in dmat. (1.0 - weightDistMat) is the weight associated
186 to spreading out the structure (density) in the cost function
187
188 \param nFlipsPerSample - the number of rotatable bonds that are
189 flipped at random for each sample
190
191 \param nSamples - the number of samples
192
193 \param sampleSeed - seed for the random sampling process
194
195 \param permuteDeg4Nodes - try permuting the drawing order of bonds around
196 atoms with four neighbors in order to improve the depiction
197
198 \param forceRDKit - use RDKit to generate coordinates even if
199 preferCoordGen is set to true
200
201 \return ID of the conformation added to the molecule containing the
202 2D coordinates
203
204
205*/
207 RDKit::ROMol &mol, const DOUBLE_SMART_PTR *dmat = nullptr,
208 bool canonOrient = true, bool clearConfs = true, double weightDistMat = 0.5,
209 unsigned int nFlipsPerSample = 3, unsigned int nSamples = 100,
210 int sampleSeed = 25, bool permuteDeg4Nodes = true, bool forceRDKit = false);
211
213 //! if false (default), a DepictException is thrown if the molecule
214 /// does not have a substructure match to the reference;
215 /// if true, an unconstrained depiction will be generated
216 bool acceptFailure = false;
217 //! if true, use RDKit to generate coordinates even if preferCoordGen
218 /// is set to true; defaults to false
219 bool forceRDKit = false;
220 //! if true, terminal dummy atoms in the reference are ignored
221 /// if they match an implicit hydrogen in the molecule or if they are
222 /// attached to a query atom; defaults to false
223 bool allowRGroups = false;
224 //! if false (default), a part of the molecule is hard-constrained
225 /// to have the same coordinates as the reference, and the rest of
226 // the molecule is built around it; if true, coordinates
227 /// from conformation existingConfId are preserved (if they exist)
228 /// or generated without constraints (if they do not exist), then
229 /// the conformation is rigid-body aligned to the reference
230 bool alignOnly = false;
231 //! if true (default), existing wedging information will be updated
232 /// or cleared as required; if false (default), existing molblock
233 /// wedging information will always be preserved
234 bool adjustMolBlockWedging = true;
235 //! conformation id whose 2D coordinates should be
236 /// * rigid-body aligned to the reference (if alignOnly is true)
237 /// * used to determine whether existing molblock wedging information
238 /// can be preserved following the constrained depiction (if
239 /// adjustMolBlockWedging is true)
240 int existingConfId = -1;
241};
242
243//! \brief Compute 2D coordinates where a piece of the molecule is
244/// constrained to have the same coordinates as a reference.
245/// Correspondences between reference and molecule atom indices
246/// are determined by refMatchVect.
247/*!
248 This function generates a depiction for a molecule where a piece of the
249 molecule is constrained to have the same coordinates as a reference.
250
251 This is useful for, for example, generating depictions of SAR data
252 sets so that the cores of the molecules are all oriented the same way.
253 This overload allow to specify the (referenceAtom, molAtom) index pairs
254 which should be matched as MatchVectType. Please note that the
255 vector can be shorter than the number of atoms in the reference.
256
257 ARGUMENTS:
258
259 \param mol - the molecule to be aligned, this will come back
260 with a single conformer.
261 \param reference - a molecule with the reference atoms to align to;
262 this should have a depiction.
263 \param refMatchVect - a MatchVectType that will be used to
264 generate the atom mapping between the molecule
265 and the reference.
266 \param confId - (optional) the id of the reference conformation to use
267 \param params - (optional) an instance of ConstrainedDepictionParams
268*/
270 RDKit::ROMol &mol, const RDKit::ROMol &reference,
271 const RDKit::MatchVectType &refMatchVect, int confId = -1,
273
274//! \brief Overload
275/*!
276 ARGUMENTS:
277
278 \param mol - the molecule to be aligned, this will come back
279 with a single conformer.
280 \param reference - a molecule with the reference atoms to align to;
281 this should have a depiction.
282 \param refMatchVect - a MatchVectType that will be used to
283 generate the atom mapping between the molecule
284 and the reference.
285 \param confId - the id of the reference conformation to use
286 \param forceRDKit - use RDKit to generate coordinates even if
287 preferCoordGen is set to true
288*/
290 RDKit::ROMol &mol, const RDKit::ROMol &reference,
291 const RDKit::MatchVectType &refMatchVect, int confId, bool forceRDKit);
292
293//! \brief Compute 2D coordinates constrained to a reference;
294/// the constraint can be hard (default) or soft.
295/*!
296 Hard (default, ConstrainedDepictionParams::alignOnly = false):
297 Existing molecule coordinates, if present, are discarded;
298 new coordinates are generated constraining a piece of the molecule
299 to have the same coordinates as the reference, while the rest of
300 the molecule is built around it.
301 If ConstrainedDepictionParams::adjustMolBlockWedging is false
302 (default), existing molblock wedging information is always preserved.
303 If ConstrainedDepictionParams::adjustMolBlockWedging is true,
304 existing molblock wedging information is preserved in case it
305 only involves the invariant core and the core conformation has not
306 changed, while it is cleared in case the wedging is also outside
307 the invariant core, or core coordinates were changed.
308 If ConstrainedDepictionParams::acceptFailure is set to true and no
309 substructure match is found, coordinates will be recomputed from
310 scratch, hence molblock wedging information will be cleared.
311
312 Soft (ConstrainedDepictionParams::alignOnly = true):
313 Existing coordinates in the conformation identified by
314 ConstrainedDepictionParams::existingConfId are preserved if present,
315 otherwise unconstrained new coordinates are generated.
316 Subsequently, coodinates undergo a rigid-body alignment to the reference.
317 If ConstrainedDepictionParams::adjustMolBlockWedging is false
318 (default), existing molblock wedging information is always preserved.
319 If ConstrainedDepictionParams::adjustMolBlockWedging is true,
320 existing molblock wedging information is inverted in case the rigid-body
321 alignment involved a flip around the Z axis.
322
323 This is useful, for example, for generating depictions
324 of SAR data sets such that the cores of the molecules are all oriented
325 the same way.
326
327 ARGUMENTS:
328
329 \param mol - the molecule to be aligned, this will come back
330 with a single conformer.
331 \param reference - a molecule with the reference atoms to align to;
332 this should have a depiction.
333 \param confId - (optional) the id of the reference conformation to use
334 \param referencePattern - (optional) a query molecule to be used to
335 generate the atom mapping between the molecule
336 and the reference.
337 \param params - (optional) a ConstrainedDepictionParams instance
338 RETURNS:
339
340 \return MatchVectType with (queryAtomidx, molAtomIdx) pairs used for
341 the constrained depiction
342*/
344 RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId = -1,
345 const RDKit::ROMol *referencePattern =
346 static_cast<const RDKit::ROMol *>(nullptr),
348
349//! \brief Compute 2D coordinates where a piece of the molecule is
350/// constrained to have the same coordinates as a reference.
351/*!
352 This function generates a depiction for a molecule where a piece of the
353 molecule is constrained to have the same coordinates as a reference.
354
355 This is useful, for example, for generating depictions
356 of SAR data sets such that the cores of the molecules are all oriented
357 the same way.
358
359 ARGUMENTS:
360
361 \param mol - the molecule to be aligned, this will come back
362 with a single conformer.
363 \param reference - a molecule with the reference atoms to align to;
364 this should have a depiction.
365 \param confId - the id of the reference conformation to use
366 \param referencePattern - a query molecule to be used to
367 generate the atom mapping between the molecule
368 and the reference.
369 \param acceptFailure - if true, standard depictions will be
370 generated for molecules that don't have a substructure
371 match to the reference; if false, throws a
372 DepictException.
373 \param forceRDKit - (optional) use RDKit to generate coordinates even if
374 preferCoordGen is set to true
375 \param allowOptionalAttachments - (optional) if true, terminal dummy atoms in
376 the reference are ignored if they match an implicit
377 hydrogen in the molecule, and a constrained
378 depiction is still attempted
379 RETURNS:
380
381 \return MatchVectType with (queryAtomidx, molAtomIdx) pairs used for
382 the constrained depiction
383*/
385 RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId,
386 const RDKit::ROMol *referencePattern, bool acceptFailure,
387 bool forceRDKit = false, bool allowOptionalAttachments = false);
388
389//! \brief Generate a 2D depiction for a molecule where all or part of
390/// it mimics the coordinates of a 3D reference structure.
391/*!
392 Generates a depiction for a molecule where a piece of the molecule
393 is constrained to have coordinates similar to those of a 3D reference
394 structure.
395
396 ARGUMENTS:
397 \param mol - the molecule to be aligned, this will come back
398 with a single conformer containing 2D coordinates
399 \param reference - a molecule with the reference atoms to align to.
400 By default this should be the same as mol, but with
401 3D coordinates
402 \param confId - (optional) the id of the reference conformation to use
403 \param refPattern - (optional) a query molecule to map a subset of
404 the reference onto the mol, so that only some of the
405 atoms are aligned.
406 \param acceptFailure - (optional) if true, standard depictions will be
407 generated
408 for molecules that don't match the reference or the
409 referencePattern; if false, throws a DepictException.
410 \param forceRDKit - (optional) use RDKit to generate coordinates even if
411 preferCoordGen is set to true
412*/
414 RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId = -1,
415 RDKit::ROMol *referencePattern = nullptr, bool acceptFailure = false,
416 bool forceRDKit = false);
417
418//! \brief Rotate the 2D depiction such that the majority of bonds have an
419//! angle with the X axis which is a multiple of 30 degrees.
420/*!
421
422 ARGUMENTS:
423 \param mol - the molecule to be rotated
424 \param confId - (optional) the id of the reference conformation to use
425 \param minimizeRotation - (optional) if false (the default), the molecule
426 is rotated such that the majority of bonds have an angle with the
427 X axis of 30 or 90 degrees. If true, the minimum rotation is applied
428 such that the majority of bonds have an angle with the X axis of
429 0, 30, 60, or 90 degrees, with the goal of altering the initial
430 orientation as little as possible .
431*/
432
434 int confId = -1,
435 bool minimizeRotation = false);
436
437//! \brief Normalizes the 2D depiction.
438/*!
439 If canonicalize is != 0, the depiction is subjected to a canonical
440 transformation such that its main axis is aligned along the X axis
441 (canonicalize >0, the default) or the Y axis (canonicalize <0).
442 If canonicalize is 0, no canonicalization takes place.
443 If scaleFactor is <0.0 (the default) the depiction is scaled such
444 that bond lengths conform to RDKit standards. The applied scaling
445 factor is returned.
446
447 ARGUMENTS:
448 \param mol - the molecule to be normalized
449 \param confId - (optional) the id of the reference conformation to use
450 \param canonicalize - (optional) if != 0, a canonical transformation is
451 applied: if >0 (the default), the main molecule axis is
452 aligned to the X axis, if <0 to the Y axis.
453 If 0, no canonical transformation is applied.
454 \param scaleFactor - (optional) if >0.0, the scaling factor to apply. The
455 default (-1.0) means that the depiction is automatically
456 scaled such that bond lengths are the standard RDKit
457 ones.
458 RETURNS:
459
460 \return the applied scaling factor.
461*/
462
464 int confId = -1,
465 int canonicalize = 1,
466 double scaleFactor = -1.0);
467}; // namespace RDDepict
468
469#endif
DepictException(const char *msg)
Definition RDDepictor.h:33
~DepictException() noexcept override=default
DepictException(const std::string msg)
Definition RDDepictor.h:34
const char * what() const noexcept override
Definition RDDepictor.h:35
#define RDKIT_DEPICTOR_EXPORT
Definition export.h:89
boost::shared_array< double > DOUBLE_SMART_PTR
RDKIT_DEPICTOR_EXPORT void generateDepictionMatching2DStructure(RDKit::ROMol &mol, const RDKit::ROMol &reference, const RDKit::MatchVectType &refMatchVect, int confId=-1, const ConstrainedDepictionParams &params=ConstrainedDepictionParams())
Compute 2D coordinates where a piece of the molecule is constrained to have the same coordinates as a...
RDKIT_DEPICTOR_EXPORT void generateDepictionMatching3DStructure(RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId=-1, RDKit::ROMol *referencePattern=nullptr, bool acceptFailure=false, bool forceRDKit=false)
Generate a 2D depiction for a molecule where all or part of it mimics the coordinates of a 3D referen...
RDKIT_DEPICTOR_EXPORT double normalizeDepiction(RDKit::ROMol &mol, int confId=-1, int canonicalize=1, double scaleFactor=-1.0)
Normalizes the 2D depiction.
RDKIT_DEPICTOR_EXPORT unsigned int compute2DCoords(RDKit::ROMol &mol, const Compute2DCoordParameters &params)
Generate 2D coordinates (a depiction) for a molecule.
RDKIT_DEPICTOR_EXPORT void straightenDepiction(RDKit::ROMol &mol, int confId=-1, bool minimizeRotation=false)
Rotate the 2D depiction such that the majority of bonds have an angle with the X axis which is a mult...
RDKIT_DEPICTOR_EXPORT unsigned int compute2DCoordsMimicDistMat(RDKit::ROMol &mol, const DOUBLE_SMART_PTR *dmat=nullptr, bool canonOrient=true, bool clearConfs=true, double weightDistMat=0.5, unsigned int nFlipsPerSample=3, unsigned int nSamples=100, int sampleSeed=25, bool permuteDeg4Nodes=true, bool forceRDKit=false)
Compute the 2D coordinates such the interatom distances mimic those in a distance matrix.
RDKIT_DEPICTOR_EXPORT bool preferCoordGen
std::map< int, Point2D > INT_POINT2D_MAP
Definition point.h:569
Std stuff.
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)