RDKit
Open-source cheminformatics and machine learning.
EmbeddedFrag.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef _RD_EMBEDDED_FRAG_H_
11 #define _RD_EMBEDDED_FRAG_H_
12 
13 #include <RDGeneral/types.h>
14 #include <Geometry/Transform2D.h>
15 #include <Geometry/point.h>
16 #include "DepictUtils.h"
17 #include <boost/smart_ptr.hpp>
18 
19 namespace RDKit {
20 class ROMol;
21 class Bond;
22 }
23 
24 namespace RDDepict {
25 typedef boost::shared_array<double> DOUBLE_SMART_PTR;
26 
27 //! Class that contains the data for an atoms that has alredy been embedded
28 class EmbeddedAtom {
29  public:
30  typedef enum { UNSPECIFIED = 0, CISTRANS, RING } EAtomType;
31 
33  : aid(0),
34  angle(-1.0),
35  nbr1(-1),
36  nbr2(-1),
37  CisTransNbr(-1),
38  ccw(true),
39  rotDir(0),
40  d_density(-1.0) {
41  neighs.clear();
42  }
43 
44  EmbeddedAtom(unsigned int aid, const RDGeom::Point2D &pos)
45  : aid(aid),
46  angle(-1.0),
47  nbr1(-1),
48  nbr2(-1),
49  CisTransNbr(-1),
50  ccw(true),
51  rotDir(0),
52  d_density(-1.0) {
53  loc = pos;
54  }
55 
57  loc = other.loc;
58  angle = other.angle;
59  nbr1 = other.nbr1;
60  nbr2 = other.nbr2;
61  CisTransNbr = other.CisTransNbr;
62  rotDir = other.rotDir;
63  normal = other.normal;
64  ccw = other.ccw;
65  neighs = other.neighs;
66  d_density = other.d_density;
67  return *this;
68  }
69 
70  void Transform(const RDGeom::Transform2D &trans) {
71  RDGeom::Point2D temp = loc + normal;
72  trans.TransformPoint(loc);
73  trans.TransformPoint(temp);
74  normal = temp - loc;
75  }
76 
77  void Reflect(const RDGeom::Point2D &loc1, const RDGeom::Point2D &loc2) {
78  RDGeom::Point2D temp = loc + normal;
79  loc = reflectPoint(loc, loc1, loc2);
80  temp = reflectPoint(temp, loc1, loc2);
81  normal = temp - loc;
82  ccw = (!ccw);
83  }
84 
85  unsigned int aid; // the id of the atom
86 
87  //! the angle that is already takes at this atom, so any new atom attaching to
88  // this
89  //! atom with have to fall in the available part
90  double angle;
91 
92  //! the first neighbor of this atom that form the 'angle'
93  int nbr1;
94 
95  //! the second neighbor of atom that from the 'angle'
96  int nbr2;
97 
98  //! is this is a cis/trans atom the neighbor of this atom that is involved in
99  // the
100  //! cis/trans system - defaults to -1
102 
103  //! which direction do we rotate this normal to add the next bond
104  //! if ccw is true we rotate counter cloack wise, otherwise rotate clock wise,
105  // by an angle that is
106  //! <= PI/2
107  bool ccw;
108 
109  //! rotation direction around this atom when adding new atoms,
110  //! we determine this for the first neighbor and stick to this direction after
111  // that
112  //! useful only on atoms that are degree >= 4
113  int rotDir;
114 
115  RDGeom::Point2D loc; // the current location of this atom
116  //! this is a normal vector to one of the bonds that added this atom
117  //! it provides the side on which we want to add a new bond to this atom,
118  //! this is only relevant when we are dealing with non ring atoms. We would
119  // like to draw chains in
120  //! a zig-zag manner
122 
123  //! and these are the atom IDs of the neighbors that still need to be embedded
125 
126  // density of the atoms around this atoms
127  // - this is sum of inverse of the square of distances to other atoms from
128  // this atom
129  // Used in the collision removal code - initialized to -1.0
130  double d_density;
131 };
132 
133 typedef std::map<unsigned int, EmbeddedAtom> INT_EATOM_MAP;
134 typedef INT_EATOM_MAP::iterator INT_EATOM_MAP_I;
135 typedef INT_EATOM_MAP::const_iterator INT_EATOM_MAP_CI;
136 
137 //! Class containing a fragment of a molecule that has already been embedded
138 /*
139  Here is how this class is designed to be used
140  - find a set of fused rings and compte the coordinates for the atoms in those
141  ring
142  - them grow this sytem either by adding non ring neighbors
143  - or by adding other embedded fragment
144  - so at the end of the process the whole molecule end up being one these
145  embedded frag objects
146 */
148  // REVIEW: think about moving member functions up to global level and just
149  // using
150  // this class as a container
151 
152  public:
153  //! Default constructor
154  EmbeddedFrag() : d_done(false), dp_mol(0) {
155  d_eatoms.clear();
156  d_attachPts.clear();
157  };
158 
159  //! Intializer from a single atom id
160  /*!
161  A single Embedded Atom with this atom ID is added and placed at the origin
162  */
163  EmbeddedFrag(unsigned int aid, const RDKit::ROMol *mol);
164 
165  //! Constructor when the coordinates have been specified for a set of atoms
166  /*!
167  This simply initialized a set of EmbeddedAtom to have the same coordinates
168  as the
169  one's specified. No testing is done to verify any kind of ocrrectlness.
170  Also
171  this fragment is less ready (to expand and add new neighbors) than when
172  using other
173  constructors. This is because:
174  - the user may have specified coords for only a part of the atoms in a
175  fused ring systems
176  in which case we need to find these atoms and merge these ring systems to
177  this fragment
178  - The atoms are not yet aware of their neighbor (what is left to add etc.)
179  this again
180  depends on atoms properly so that new
181  neighbors can be added to them
182  */
183  EmbeddedFrag(const RDKit::ROMol *mol,
184  const RDGeom::INT_POINT2D_MAP &coordMap);
185 
186  //! Initializer from a set of fused rings
187  /*!
188  ARGUMENTS:
189  \param mol the molecule of interest
190  \param fusedRings a vector of rings, each ring is a list of atom ids
191  */
192  EmbeddedFrag(const RDKit::ROMol *mol, const RDKit::VECT_INT_VECT &fusedRings);
193 
194  //! Initializer for a cis/trans system using the double bond
195  /*!
196  ARGUMENTS:
197  \param dblBond the double bond that is involed in the cis/trans
198  configuration
199  */
200  explicit EmbeddedFrag(const RDKit::Bond *dblBond);
201 
202  //! Expand this embedded system by adding negihboring atoms or other embedded
203  // systems
204  /*!
205 
206  Note that both nratms and efrags are modified in this function
207  as we start merging them with the current fragment
208 
209  */
210  void expandEfrag(RDKit::INT_LIST &nratms, std::list<EmbeddedFrag> &efrags);
211 
212  //! Add a new non-ring atom to this object
213  /*
214  ARGUMENTS:
215  \param aid ID of the atom to be added
216  \param toAid ID of the atom that is already in this object to which this
217  atom is added
218  */
219  void addNonRingAtom(unsigned int aid, unsigned int toAid);
220 
221  //! Merge this embedded object with another embedded fragment
222  /*!
223 
224  The transformation (rotation + translation required to attached
225  the passed in object will be computed and applied. The
226  coordinates of the atoms in this object will remain fixed We
227  will assume that there are no common atoms between the two
228  fragments to start with
229 
230  ARGUMENTS:
231  \param embObj another EmbeddedFrag object to be merged with this object
232  \param toAid the atom in this embedded fragment to which the new object
233  will be attached
234  \param nbrAid the atom in the other fragment to attach to
235  */
236  void mergeNoCommon(EmbeddedFrag &embObj, unsigned int toAid,
237  unsigned int nbrAid);
238 
239  //! Merge this embedded object with another embedded fragment
240  /*!
241 
242  The transformation (rotation + translation required to attached
243  the passed in object will be computed and applied. The
244  coordinates of the atoms in this object will remain fixed This
245  already know there are a atoms in common and we will use them to
246  merge things
247 
248  ARGUMENTS:
249  \param embObj another EmbeddedFrag object to be merged with this object
250  \param commAtms a vector of ids of the common atoms
251 
252  */
253  void mergeWithCommon(EmbeddedFrag &embObj, RDKit::INT_VECT &commAtms);
254 
255  void mergeFragsWithComm(std::list<EmbeddedFrag> &efrags);
256 
257  //! Mark this fragment to be done for final embedding
258  void markDone() { d_done = true; }
259 
260  //! If this fragment done for the final embedding
261  bool isDone() { return d_done; }
262 
263  //! Get the molecule that this embedded fragmetn blongs to
264  const RDKit::ROMol *getMol() const { return dp_mol; }
265 
266  //! Find the common atom ids between this fragment and a second one
267  RDKit::INT_VECT findCommonAtoms(const EmbeddedFrag &efrag2);
268 
269  //! Find a neighbor to a non-ring atom among the already embedded atoms
270  /*!
271  ARGUMENTS:
272  \param aid the atom id of interest
273 
274  RETURNS:
275  \return the id of the atom if we found a neighbor
276  -1 otherwise
277 
278  NOTE: by definition we can have only one neighbor in the embdded system.
279  */
280  int findNeighbor(unsigned int aid);
281 
282  //! Tranform this object to a new coordinates system
283  /*!
284  ARGUMENTS:
285  \param trans : the transformation that need to be applied to the atoms in
286  this object
287  */
288  void Transform(const RDGeom::Transform2D &trans);
289 
290  void Reflect(const RDGeom::Point2D &loc1, const RDGeom::Point2D &loc2);
291 
292  const INT_EATOM_MAP &GetEmbeddedAtoms() const { return d_eatoms; }
293 
294  void Translate(const RDGeom::Point2D &shift) {
295  INT_EATOM_MAP_I eari;
296  for (eari = d_eatoms.begin(); eari != d_eatoms.end(); eari++) {
297  eari->second.loc += shift;
298  }
299  }
300 
301  EmbeddedAtom GetEmbeddedAtom(unsigned int aid) const {
302  INT_EATOM_MAP_CI posi = d_eatoms.find(aid);
303  if (posi == d_eatoms.end()) {
304  PRECONDITION(0, "Embedded atom does not contain embedded atom specified");
305  }
306  return posi->second;
307  }
308 
309  //! the number of atoms in the embedded system
310  int Size() const { return d_eatoms.size(); }
311 
312  //! \brief compute a box that encloses the fragment
313  void computeBox();
314 
315  //! \brief Flip atoms on one side of a bond - used in removing colissions
316  /*!
317  ARGUMENTS:
318  \param bondId - the bond used as the mirror to flip
319  \param flipEnd - flip the atoms at the end of the bond
320 
321  */
322  void flipAboutBond(unsigned int bondId, bool flipEnd = true);
323 
324  void openAngles(const double *dmat, unsigned int aid1, unsigned int aid2);
325 
326  std::vector<PAIR_I_I> findCollisions(const double *dmat,
327  bool includeBonds = 1);
328 
329  void computeDistMat(DOUBLE_SMART_PTR &dmat);
330 
331  double mimicDistMatAndDensityCostFunc(const DOUBLE_SMART_PTR *dmat,
332  double mimicDmatWt);
333 
334  void permuteBonds(unsigned int aid, unsigned int aid1, unsigned int aid2);
335 
336  void randomSampleFlipsAndPermutations(unsigned int nBondsPerSample = 3,
337  unsigned int nSamples = 100,
338  int seed = 100,
339  const DOUBLE_SMART_PTR *dmat = 0,
340  double mimicDmatWt = 0.0,
341  bool permuteDeg4Nodes = false);
342 
343  //! Remove collisions in a structure by flipping rotable bonds
344  //! along the shortest path between two colliding atoms
345  void removeCollisionsBondFlip();
346 
347  //! Remove collision by opening angles at the offending atoms
348  void removeCollisionsOpenAngles();
349 
350  //! Remove collisions by shortening bonds along the shortest path between the
351  // atoms
352  void removeCollisionsShortenBonds();
353 
354  //! helpers funtions to
355 
356  //! \brief make list of neighbors for each atom in the embedded system that
357  //! still need to be embedded
358  void setupNewNeighs();
359 
360  //! update the unembedded neighbor atom list for a specified atom
361  void updateNewNeighs(unsigned int aid);
362 
363  //! \brief Find all atoms in this embedded system that are
364  //! within a specified distant of a point
365  int findNumNeigh(const RDGeom::Point2D &pt, double radius);
366 
367  inline double getBoxPx() { return d_px; }
368  inline double getBoxNx() { return d_nx; }
369  inline double getBoxPy() { return d_py; }
370  inline double getBoxNy() { return d_ny; }
371 
372  void canonicalizeOrientation();
373 
374  private:
375  double totalDensity();
376 
377  void embedFusedRings(const RDKit::VECT_INT_VECT &fusedRings);
378 
379  //! \brief Find a transform to join a ring to the current embedded frag when
380  // we
381  //! have only on common atom
382  /*!
383  So this is the state of affairs assumed here:
384  - we already have some rings in the fused system embeded and the
385  coordinates for the atoms
386  - the coordinates for the atoms in the new ring (with the center
387  of rings at the origin) are available nringCors. we want to
388  translate and rotate this ring to join with the already
389  embeded rings.
390  - only one atom is common between this new ring and the atoms
391  that are already embedded
392  - so we need to compute a transform that includes a translation
393  so that the common atom overlaps and the rotation to minimize
394  overalp with other atoms.
395 
396  Here's what is done:
397  - we bisect the remaining sweep angle at the common atom and
398  attach the new ring such that the center of the new ring falls
399  on this bisecting line
400 
401  NOTE: It is assumed here that the original coordinates for the
402  new ring are such that the center is at the origin (this is the
403  way rings come out of embedRing)
404  */
405  RDGeom::Transform2D computeOneAtomTrans(unsigned int commAid,
406  const EmbeddedFrag &other);
407 
408  RDGeom::Transform2D computeTwoAtomTrans(
409  unsigned int aid1, unsigned int aid2,
410  const RDGeom::INT_POINT2D_MAP &nringCor);
411 
412  //! Merge a ring with already embedded atoms
413  /*!
414  It is assumed that the new rings has already been oriented
415  correctly etc. This function just update all the relevant data,
416  like the neighbor information and the sweep angle
417  */
418  void mergeRing(const EmbeddedFrag &embRing, unsigned int nCommon,
419  const RDKit::INT_VECT &pinAtoms);
420 
421  //! Reflect a fragment if necessary through a line connecting two atoms
422  /*!
423 
424  We want add the new fragment such that, most of its atoms fall
425  on the side opoiste to where the atoms already embedded are aid1
426  and aid2 give the atoms that were used to align the new ring to
427  the embedded atoms and we will assume that that process has
428  already taken place (i.e. transformRing has been called)
429 
430  */
431  void reflectIfNecessaryDensity(EmbeddedFrag &embFrag, unsigned int aid1,
432  unsigned int aid2);
433 
434  //! Reflect a fragment if necessary based on the cis/trans specification
435  /*!
436 
437  we want to add the new fragment such that the cis/trans
438  specification on bond between aid1 and aid2 is not violated. We
439  will assume that aid1 and aid2 from this fragments as well as
440  embFrag are already aligned to each other.
441 
442  \param embFrag the fragment that will be reflected if necessary
443  \param ctCase which fragment if the cis/trans dbl bond
444  - 1 means embFrag is the cis/trans fragment
445  - 2 mean "this" is the cis/trans fragment
446  \param aid1 first atom that forms the plane (line) of reflection
447  \param aid2 seconf atom that forms the plane of reflection
448  */
449  void reflectIfNecessaryCisTrans(EmbeddedFrag &embFrag, unsigned int ctCase,
450  unsigned int aid1, unsigned int aid2);
451 
452  //! Reflect a fragment if necessary based on a thrid common point
453  /*!
454 
455  we want add the new fragment such that the thrid point falls on
456  the same side of aid1 and aid2. We will assume that aid1 and
457  aid2 from this fragments as well as embFrag are already aligned
458  to each other.
459 
460  */
461  void reflectIfNecessaryThirdPt(EmbeddedFrag &embFrag, unsigned int aid1,
462  unsigned int aid2, unsigned int aid3);
463 
464  //! \brief Initialize this fragment from a ring and coordinates for its atoms
465  /*!
466  ARGUMENTS:
467  /param ring a vector of atom ids in the ring; it is assumed that there
468  in
469  clockwise or anti-clockwise order
470  /param nringMap a map of atomId to coordinate map for the atoms in the ring
471  */
472  void initFromRingCoords(const RDKit::INT_VECT &ring,
473  const RDGeom::INT_POINT2D_MAP &nringMap);
474 
475  //! Helper function to addNonRingAtom to a specified atoms in the fragment
476  /*
477  Add an atom to this embedded fragment when the fragment already
478  has a atleast two previously added neighbors to 'toAid'. In this
479  case we have to choose where the the new neighbor goes based on
480  the angle that is already taken around the atom.
481 
482  ARGUMENTS:
483  \param aid ID of the atom to be added
484  \param toAid ID of the atom that is already in this object to which this
485  atom is added
486  */
487  void addAtomToAtomWithAng(unsigned int aid, unsigned int toAid);
488 
489  //! Helper function to addNonRingAtom to a specified atoms in the fragment
490  /*!
491 
492  Add an atom (aid) to an atom (toAid) in this embedded fragment
493  when 'toAid' has one or no neighbors previously added. In this
494  case where the new atom should fall is determined by the degree
495  of 'toAid' and the congestion around it.
496 
497  ARGUMENTS:
498  \param aid ID of the atom to be added
499  \param toAid ID of the atom that is already in this object to which this
500  atom is added
501  \param mol the molecule we are dealing with
502  */
503  void addAtomToAtomWithNoAng(
504  unsigned int aid,
505  unsigned int toAid); //, const RDKit::ROMol *mol);
506 
507  //! Helper funtion to contructor that takes predefined coordinates
508  /*!
509 
510  Given an atom with more than 2 neighbors all embedded in this
511  fragment this function tries to determine
512 
513  - how much of an angle if left for any new neighbors yet to be
514  added
515  - which atom should we rotate when we add a new neighbor and in
516  which direction (clockwise or anticlockwise
517 
518  This is how it works
519  - find the pair of nbrs that have the largest angle
520  - this will most likely be the angle that is available - unless
521  we have fused rings and we found on of the ring angle !!!! -
522  in this cae we find the next best
523  - find the smallest anngle that contains one of these nbrs -
524  this determined which
525  - way we want to rotate
526 
527  ARGUMENTS:
528  \param aid the atom id where we are centered right now
529  \param doneNbrs list of neighbors that are already embedded around aid
530  */
531  void computeNbrsAndAng(unsigned int aid, const RDKit::INT_VECT &doneNbrs);
532  // const RDKit::ROMol *mol);
533 
534  //! are we embedded with the final (molecule) coordinates
535  bool d_done;
536  double d_px, d_nx, d_py, d_ny;
537 
538  //! a map that takes one from teh atom id to the embeddedatom object for that
539  // atom.
540  INT_EATOM_MAP d_eatoms;
541 
542  // RDKit::INT_DEQUE d_attachPts;
543  RDKit::INT_LIST d_attachPts;
544 
545  // pointer to the owning molecule
546  const RDKit::ROMol *dp_mol;
547 };
548 }
549 
550 #endif
std::list< int > INT_LIST
Definition: types.h:194
boost::shared_array< double > DOUBLE_SMART_PTR
Definition: EmbeddedFrag.h:25
EmbeddedAtom & operator=(const EmbeddedAtom &other)
Definition: EmbeddedFrag.h:56
const RDKit::ROMol * getMol() const
Get the molecule that this embedded fragmetn blongs to.
Definition: EmbeddedFrag.h:264
EmbeddedAtom(unsigned int aid, const RDGeom::Point2D &pos)
Definition: EmbeddedFrag.h:44
RDGeom::Point2D loc
Definition: EmbeddedFrag.h:115
RDGeom::Point2D normal
a zig-zag manner
Definition: EmbeddedFrag.h:121
double angle
the angle that is already takes at this atom, so any new atom attaching to
Definition: EmbeddedFrag.h:90
void Transform(const RDGeom::Transform2D &trans)
Definition: EmbeddedFrag.h:70
void Translate(const RDGeom::Point2D &shift)
Definition: EmbeddedFrag.h:294
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:103
std::vector< INT_VECT > VECT_INT_VECT
Definition: types.h:202
std::map< int, Point2D > INT_POINT2D_MAP
Definition: point.h:511
const INT_EATOM_MAP & GetEmbeddedAtoms() const
Definition: EmbeddedFrag.h:292
std::vector< int > INT_VECT
Definition: types.h:188
Class containing a fragment of a molecule that has already been embedded.
Definition: EmbeddedFrag.h:147
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
Class that contains the data for an atoms that has alredy been embedded.
Definition: EmbeddedFrag.h:28
int Size() const
the number of atoms in the embedded system
Definition: EmbeddedFrag.h:310
int rotDir
useful only on atoms that are degree >= 4
Definition: EmbeddedFrag.h:113
int nbr2
the second neighbor of atom that from the &#39;angle&#39;
Definition: EmbeddedFrag.h:96
class for representing a bond
Definition: Bond.h:47
std::map< unsigned int, EmbeddedAtom > INT_EATOM_MAP
Definition: EmbeddedFrag.h:133
RDGeom::Point2D reflectPoint(const RDGeom::Point2D &point, const RDGeom::Point2D &loc1, const RDGeom::Point2D &loc2)
EmbeddedFrag()
Default constructor.
Definition: EmbeddedFrag.h:154
INT_EATOM_MAP::const_iterator INT_EATOM_MAP_CI
Definition: EmbeddedFrag.h:135
void Reflect(const RDGeom::Point2D &loc1, const RDGeom::Point2D &loc2)
Definition: EmbeddedFrag.h:77
void markDone()
Mark this fragment to be done for final embedding.
Definition: EmbeddedFrag.h:258
bool isDone()
If this fragment done for the final embedding.
Definition: EmbeddedFrag.h:261
#define PRECONDITION(expr, mess)
Definition: Invariant.h:107
EmbeddedAtom GetEmbeddedAtom(unsigned int aid) const
Definition: EmbeddedFrag.h:301
INT_EATOM_MAP::iterator INT_EATOM_MAP_I
Definition: EmbeddedFrag.h:134
RDKit::INT_VECT neighs
and these are the atom IDs of the neighbors that still need to be embedded
Definition: EmbeddedFrag.h:124
int CisTransNbr
is this is a cis/trans atom the neighbor of this atom that is involved in
Definition: EmbeddedFrag.h:101
void TransformPoint(Point2D &pt) const
int nbr1
the first neighbor of this atom that form the &#39;angle&#39;
Definition: EmbeddedFrag.h:93