RDKit
Open-source cheminformatics and machine learning.
O3AAlignMolecules.h
Go to the documentation of this file.
1 // $Id$
2 //
3 // Copyright (C) 2013-2014 Paolo Tosco
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #ifndef _RD_O3AALIGNMOLECULES_H_
12 #define _RD_O3AALIGNMOLECULES_H_
13 
14 #include <RDGeneral/Invariant.h>
15 #include <Geometry/Transform3D.h>
16 #include <Geometry/point.h>
17 #include <Numerics/Vector.h>
18 #include <GraphMol/ROMol.h>
19 #include <GraphMol/Conformer.h>
22 #include <vector>
23 #include <cmath>
24 #include <boost/shared_ptr.hpp>
25 #include <boost/multi_array.hpp>
26 #include <boost/dynamic_bitset.hpp>
27 
28 namespace RDKit {
29 namespace MolAlign {
30 typedef struct O3AFuncData {
33  void *prbProp;
34  void *refProp;
35  int coeff;
36  int weight;
37  bool useMMFFSim;
38 } O3AFuncData;
39 inline bool isDoubleZero(const double x) {
40  return ((x < 1.0e-10) && (x > -1.0e-10));
41 };
42 
43 class O3AConstraintVect;
44 
45 //! A class to define alignment constraints. Each constraint
46 //! is defined by a pair of atom indexes (one for the probe,
47 //! one for the reference) and a weight. Constraints can
48 //! can be added via the O3AConstraintVect class.
50  friend class O3AConstraintVect;
51 
52  public:
53  double getPrbIdx() { return d_prbIdx; }
54  double getRefIdx() { return d_refIdx; }
55  double getWeight() { return d_weight; }
56 
57  private:
58  unsigned int d_idx;
59  unsigned int d_prbIdx;
60  unsigned int d_refIdx;
61  double d_weight;
62 };
63 
64 //! A class to store a vector of alignment constraints. Each constraint
65 //! is defined by an O3AConstraint object. Each time the append()
66 //! method is invoked, the vector is sorted to make lookup faster.
67 //! Hence, constraints are not necessarily stored in the same order
68 //! they were appended.
70  public:
71  O3AConstraintVect() : d_count(0){};
73  void append(unsigned int prbIdx, unsigned int refIdx, double weight) {
74  O3AConstraint *o3aConstraint = new O3AConstraint();
75  o3aConstraint->d_idx = d_count;
76  o3aConstraint->d_prbIdx = prbIdx;
77  o3aConstraint->d_refIdx = refIdx;
78  o3aConstraint->d_weight = weight;
79  d_o3aConstraintVect.push_back(
80  boost::shared_ptr<O3AConstraint>(o3aConstraint));
81  std::sort(d_o3aConstraintVect.begin(), d_o3aConstraintVect.end(),
82  d_compareO3AConstraint);
83  ++d_count;
84  }
85  std::vector<boost::shared_ptr<O3AConstraint> >::size_type size() {
86  return d_o3aConstraintVect.size();
87  }
88  O3AConstraint *operator[](unsigned int i) {
89  return d_o3aConstraintVect[i].get();
90  }
91 
92  private:
93  unsigned int d_count;
94  std::vector<boost::shared_ptr<O3AConstraint> > d_o3aConstraintVect;
95  static bool d_compareO3AConstraint(boost::shared_ptr<O3AConstraint> a,
96  boost::shared_ptr<O3AConstraint> b) {
97  return (
98  (a->d_prbIdx != b->d_prbIdx)
99  ? (a->d_prbIdx < b->d_prbIdx)
100  : ((a->d_refIdx != b->d_refIdx)
101  ? (a->d_refIdx < b->d_refIdx)
102  : ((a->d_weight != b->d_weight) ? (a->d_weight > b->d_weight)
103  : (a->d_idx < b->d_idx))));
104  };
105 };
106 
107 const int O3_DUMMY_COST = 100000;
108 const int O3_LARGE_NEGATIVE_WEIGHT = -1e9;
110 const unsigned int O3_MAX_H_BINS = 20;
111 const unsigned int O3_MAX_SDM_ITERATIONS = 100;
112 const unsigned int O3_MAX_SDM_THRESHOLD_ITER = 3;
113 const double O3_RANDOM_TRANS_COEFF = 5.0;
114 const double O3_THRESHOLD_DIFF_DISTANCE = 0.1;
115 const double O3_SDM_THRESHOLD_START = 0.7;
116 const double O3_SDM_THRESHOLD_STEP = 0.3;
117 const double O3_CHARGE_WEIGHT = 10.0;
118 const double O3_CRIPPEN_WEIGHT = 10.0;
119 const double O3_RMSD_THRESHOLD = 1.0e-04;
120 const double O3_SCORE_THRESHOLD = 0.01;
121 const double O3_SCORING_FUNCTION_ALPHA = 5.0;
122 const double O3_SCORING_FUNCTION_BETA = 0.5;
123 const double O3_CHARGE_COEFF = 5.0;
124 const double O3_CRIPPEN_COEFF = 1.0;
125 const int O3_MAX_WEIGHT_COEFF = 5;
126 enum {
128  O3_ACCURACY_MASK = (1 << 0 | 1 << 1),
129  O3_LOCAL_ONLY = (1 << 2)
130 };
131 
133  public:
134  MolHistogram(const ROMol &mol, const double *dmat, bool cleanupDmat = false);
136  inline int get(const unsigned int y, const unsigned int x) const {
137  PRECONDITION(y < d_h.shape()[0], "Invalid index on MolHistogram");
138  PRECONDITION(x < d_h.shape()[1], "Invalid index on MolHistogram");
139  return d_h[y][x];
140  }
141 
142  private:
143  boost::multi_array<int, 2> d_h;
144 };
145 
146 class LAP {
147  public:
148  LAP(unsigned int dim)
149  : d_rowSol(dim),
150  d_colSol(dim),
151  d_free(dim),
152  d_colList(dim),
153  d_matches(dim),
154  d_d(dim),
155  d_v(dim),
156  d_pred(dim),
157  d_cost(boost::extents[dim][dim]){};
158  ~LAP(){};
159  int getCost(const unsigned int i, const unsigned int j) {
160  PRECONDITION(i < d_cost.shape()[0], "Invalid index on LAP.cost");
161  PRECONDITION(j < d_cost.shape()[1], "Invalid index on LAP.cost");
162  return d_cost[i][j];
163  }
164  int getRowSol(const unsigned int i) {
165  PRECONDITION(i < d_rowSol.size(), "Invalid index on LAP.rowSol");
166  return d_rowSol[i];
167  }
168  void computeMinCostPath(const int dim);
169  void computeCostMatrix(const ROMol &prbMol, const MolHistogram &prbHist,
170  const ROMol &refMol, const MolHistogram &refHist,
171  O3AConstraintVect *o3aConstraintVect,
172  int (*costFunc)(const unsigned int, const unsigned int,
173  double, void *),
174  void *data, const unsigned int n_bins = O3_MAX_H_BINS);
175 
176  private:
177  std::vector<int> d_rowSol;
178  std::vector<int> d_colSol;
179  std::vector<int> d_free;
180  std::vector<int> d_colList;
181  std::vector<int> d_matches;
182  std::vector<int> d_d;
183  std::vector<int> d_v;
184  std::vector<int> d_pred;
185  boost::multi_array<int, 2> d_cost;
186 };
187 
188 class SDM {
189  public:
190  // constructor
191  SDM(const Conformer *prbConf = NULL, const Conformer *refConf = NULL,
192  O3AConstraintVect *o3aConstraintVect = NULL)
193  : d_prbConf(prbConf),
194  d_refConf(refConf),
195  d_o3aConstraintVect(o3aConstraintVect){};
196  // copy constructor
197  SDM(const SDM &other)
198  : d_prbConf(other.d_prbConf),
199  d_refConf(other.d_refConf),
200  d_o3aConstraintVect(other.d_o3aConstraintVect),
201  d_SDMPtrVect(other.d_SDMPtrVect.size()) {
202  for (unsigned int i = 0; i < d_SDMPtrVect.size(); ++i) {
203  d_SDMPtrVect[i] = boost::shared_ptr<SDMElement>(new SDMElement());
204  memcpy(d_SDMPtrVect[i].get(), other.d_SDMPtrVect[i].get(),
205  sizeof(SDMElement));
206  }
207  };
208  // assignment operator
209  SDM &operator=(const SDM &other) {
210  d_prbConf = other.d_prbConf;
211  d_refConf = other.d_refConf;
212  d_o3aConstraintVect = other.d_o3aConstraintVect;
213  d_SDMPtrVect.resize(other.d_SDMPtrVect.size());
214  for (unsigned int i = 0; i < d_SDMPtrVect.size(); ++i) {
215  d_SDMPtrVect[i] = boost::shared_ptr<SDMElement>(new SDMElement());
216  memcpy(d_SDMPtrVect[i].get(), other.d_SDMPtrVect[i].get(),
217  sizeof(SDMElement));
218  }
219 
220  return *this;
221  };
222  // destructor
223  ~SDM(){};
224  void fillFromDist(double threshold,
225  const boost::dynamic_bitset<> &refHvyAtoms,
226  const boost::dynamic_bitset<> &prbHvyAtoms);
227  void fillFromLAP(LAP &lap);
228  double scoreAlignment(double (*scoringFunc)(const unsigned int,
229  const unsigned int, void *),
230  void *data);
231  void prepareMatchWeightsVect(RDKit::MatchVectType &matchVect,
232  RDNumeric::DoubleVector &weights,
233  double (*weightFunc)(const unsigned int,
234  const unsigned int, void *),
235  void *data);
236  unsigned int size() { return d_SDMPtrVect.size(); }
237 
238  private:
239  typedef struct SDMElement {
240  unsigned int idx[2];
241  int score;
242  int cost;
243  double sqDist;
244  O3AConstraint *o3aConstraint;
245  } SDMElement;
246  const Conformer *d_prbConf;
247  const Conformer *d_refConf;
248  O3AConstraintVect *d_o3aConstraintVect;
249  std::vector<boost::shared_ptr<SDMElement> > d_SDMPtrVect;
250  static bool compareSDMScore(boost::shared_ptr<SDMElement> a,
251  boost::shared_ptr<SDMElement> b) {
252  return ((a->score != b->score)
253  ? (a->score < b->score)
254  : ((a->cost != b->cost)
255  ? (a->cost < b->cost)
256  : ((a->idx[0] != b->idx[0]) ? (a->idx[0] < b->idx[0])
257  : (a->idx[1] < b->idx[1]))));
258  };
259  static bool compareSDMDist(boost::shared_ptr<SDMElement> a,
260  boost::shared_ptr<SDMElement> b) {
261  double aWeight = (a->o3aConstraint ? a->o3aConstraint->getWeight() : 0.0);
262  double bWeight = (b->o3aConstraint ? b->o3aConstraint->getWeight() : 0.0);
263  return ((aWeight != bWeight)
264  ? (aWeight > bWeight)
265  : ((a->sqDist != b->sqDist)
266  ? (a->sqDist < b->sqDist)
267  : ((a->idx[0] != b->idx[0]) ? (a->idx[0] < b->idx[0])
268  : (a->idx[1] < b->idx[1]))));
269  };
270 };
271 
272 class O3A {
273  public:
274  //! pre-defined atom typing schemes
275  typedef enum { MMFF94 = 0, CRIPPEN } AtomTypeScheme;
276  O3A(ROMol &prbMol, const ROMol &refMol, void *prbProp, void *refProp,
277  AtomTypeScheme atomTypes = MMFF94, const int prbCid = -1,
278  const int refCid = -1, const bool reflect = false,
279  const unsigned int maxIters = 50, unsigned int options = 0,
280  const MatchVectType *constraintMap = NULL,
281  const RDNumeric::DoubleVector *constraintWeights = NULL,
282  LAP *extLAP = NULL, MolHistogram *extPrbHist = NULL,
283  MolHistogram *extRefHist = NULL);
284  O3A(int (*costFunc)(const unsigned int, const unsigned int, double, void *),
285  double (*weightFunc)(const unsigned int, const unsigned int, void *),
286  double (*scoringFunc)(const unsigned int, const unsigned int, void *),
287  void *data, ROMol &prbMol, const ROMol &refMol, const int prbCid,
288  const int refCid, boost::dynamic_bitset<> *prbHvyAtoms = NULL,
289  boost::dynamic_bitset<> *refHvyAtoms = NULL, const bool reflect = false,
290  const unsigned int maxIters = 50, unsigned int options = 0,
291  O3AConstraintVect *o3aConstraintVect = NULL, ROMol *extWorkPrbMol = NULL,
292  LAP *extLAP = NULL, MolHistogram *extPrbHist = NULL,
293  MolHistogram *extRefHist = NULL);
294  ~O3A() {
295  if (d_o3aMatchVect) {
296  delete d_o3aMatchVect;
297  }
298  if (d_o3aWeights) {
299  delete d_o3aWeights;
300  }
301  };
302  double align();
303  double trans(RDGeom::Transform3D &trans);
304  double score() { return d_o3aScore; };
305  const RDKit::MatchVectType *matches() { return d_o3aMatchVect; };
306  const RDNumeric::DoubleVector *weights() { return d_o3aWeights; };
307 
308  private:
309  ROMol *d_prbMol;
310  const ROMol *d_refMol;
311  int d_prbCid;
312  int d_refCid;
313  bool d_reflect;
314  unsigned int d_maxIters;
315  const RDKit::MatchVectType *d_o3aMatchVect;
316  const RDNumeric::DoubleVector *d_o3aWeights;
317  double d_o3aScore;
318 };
319 
320 void randomTransform(ROMol &mol, const int cid = -1, const int seed = -1);
321 const RDGeom::POINT3D_VECT *reflect(const Conformer &conf);
322 int o3aMMFFCostFunc(const unsigned int prbIdx, const unsigned int refIdx,
323  double hSum, void *data);
324 double o3aMMFFWeightFunc(const unsigned int prbIdx, const unsigned int refIdx,
325  void *data);
326 double o3aMMFFScoringFunc(const unsigned int prbIdx, const unsigned int refIdx,
327  void *data);
328 int o3aCrippenCostFunc(const unsigned int prbIdx, const unsigned int refIdx,
329  double hSum, void *data);
330 double o3aCrippenWeightFunc(const unsigned int prbIdx,
331  const unsigned int refIdx, void *data);
332 double o3aCrippenScoringFunc(const unsigned int prbIdx,
333  const unsigned int refIdx, void *data);
334 
336  ROMol &prbMol, const ROMol &refMol, void *prbProp, void *refProp,
337  std::vector<boost::shared_ptr<O3A> > &res, int numThreads = 1,
338  O3A::AtomTypeScheme atomTypes = O3A::MMFF94, const int refCid = -1,
339  const bool reflect = false, const unsigned int maxIters = 50,
340  unsigned int options = 0, const MatchVectType *constraintMap = NULL,
341  const RDNumeric::DoubleVector *constraintWeights = NULL);
342 }
343 }
344 #endif
const double O3_SDM_THRESHOLD_START
std::vector< boost::shared_ptr< O3AConstraint > >::size_type size()
SDM(const SDM &other)
Definition: RDLog.h:20
const double O3_THRESHOLD_DIFF_DISTANCE
const double O3_SCORE_THRESHOLD
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx) ...
const double O3_CHARGE_WEIGHT
O3AConstraint * operator[](unsigned int i)
Defines the primary molecule class ROMol as well as associated typedefs.
const RDGeom::POINT3D_VECT * reflect(const Conformer &conf)
void append(unsigned int prbIdx, unsigned int refIdx, double weight)
double o3aCrippenScoringFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data)
std::vector< Point3D > POINT3D_VECT
Definition: point.h:507
int o3aMMFFCostFunc(const unsigned int prbIdx, const unsigned int refIdx, double hSum, void *data)
SDM & operator=(const SDM &other)
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:106
void getO3AForProbeConfs(ROMol &prbMol, const ROMol &refMol, void *prbProp, void *refProp, std::vector< boost::shared_ptr< O3A > > &res, int numThreads=1, O3A::AtomTypeScheme atomTypes=O3A::MMFF94, const int refCid=-1, const bool reflect=false, const unsigned int maxIters=50, unsigned int options=0, const MatchVectType *constraintMap=NULL, const RDNumeric::DoubleVector *constraintWeights=NULL)
int o3aCrippenCostFunc(const unsigned int prbIdx, const unsigned int refIdx, double hSum, void *data)
const double O3_RANDOM_TRANS_COEFF
void randomTransform(ROMol &mol, const int cid=-1, const int seed=-1)
const double O3_CHARGE_COEFF
const unsigned int O3_MAX_SDM_ITERATIONS
const unsigned int O3_MAX_H_BINS
LAP(unsigned int dim)
const RDKit::MatchVectType * matches()
const double O3_CRIPPEN_WEIGHT
const unsigned int O3_MAX_SDM_THRESHOLD_ITER
const RDNumeric::DoubleVector * weights()
const double O3_SCORING_FUNCTION_BETA
int getRowSol(const unsigned int i)
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
const double O3_SDM_THRESHOLD_STEP
const double O3_RMSD_THRESHOLD
bool isDoubleZero(const double x)
AtomTypeScheme
pre-defined atom typing schemes
const int O3_MAX_WEIGHT_COEFF
SDM(const Conformer *prbConf=NULL, const Conformer *refConf=NULL, O3AConstraintVect *o3aConstraintVect=NULL)
The class for representing 2D or 3D conformation of a molecule.
Definition: Conformer.h:41
const int O3_DEFAULT_CONSTRAINT_WEIGHT
#define PRECONDITION(expr, mess)
Definition: Invariant.h:107
const int O3_LARGE_NEGATIVE_WEIGHT
struct RDKit::MolAlign::O3AFuncData O3AFuncData
double o3aMMFFWeightFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data)
const double O3_SCORING_FUNCTION_ALPHA
const double O3_CRIPPEN_COEFF
double o3aCrippenWeightFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data)
A class to represent vectors of numbers.
Definition: Vector.h:28
double o3aMMFFScoringFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data)
int getCost(const unsigned int i, const unsigned int j)