RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
O3AAlignMolecules.h
Go to the documentation of this file.
1//
2// Copyright (C) 2013-2014 Paolo Tosco
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef _RD_O3AALIGNMOLECULES_H_
12#define _RD_O3AALIGNMOLECULES_H_
13
14#include <RDGeneral/Invariant.h>
16#include <Geometry/point.h>
17#include <Numerics/Vector.h>
18#include <GraphMol/ROMol.h>
19#include <GraphMol/Conformer.h>
22#include <vector>
23#include <cmath>
24#include <boost/shared_ptr.hpp>
25#include <boost/multi_array.hpp>
26#include <boost/dynamic_bitset.hpp>
27
28namespace RDKit {
29namespace MolAlign {
39inline bool isDoubleZero(const double x) {
40 return ((x < 1.0e-10) && (x > -1.0e-10));
41};
42
43class O3AConstraintVect;
44
45//! A class to define alignment constraints. Each constraint
46//! is defined by a pair of atom indexes (one for the probe,
47//! one for the reference) and a weight. Constraints can
48//! can be added via the O3AConstraintVect class.
50 friend class O3AConstraintVect;
51
52 public:
53 double getPrbIdx() { return d_prbIdx; }
54 double getRefIdx() { return d_refIdx; }
55 double getWeight() { return d_weight; }
56
57 private:
58 unsigned int d_idx;
59 unsigned int d_prbIdx;
60 unsigned int d_refIdx;
61 double d_weight;
62};
63
64//! A class to store a vector of alignment constraints. Each constraint
65//! is defined by an O3AConstraint object. Each time the append()
66//! method is invoked, the vector is sorted to make lookup faster.
67//! Hence, constraints are not necessarily stored in the same order
68//! they were appended.
70 public:
72 ~O3AConstraintVect() = default;
73 void append(unsigned int prbIdx, unsigned int refIdx, double weight) {
74 O3AConstraint *o3aConstraint = new O3AConstraint();
75 o3aConstraint->d_idx = d_count;
76 o3aConstraint->d_prbIdx = prbIdx;
77 o3aConstraint->d_refIdx = refIdx;
78 o3aConstraint->d_weight = weight;
79 d_o3aConstraintVect.emplace_back(o3aConstraint);
80 std::sort(d_o3aConstraintVect.begin(), d_o3aConstraintVect.end(),
81 d_compareO3AConstraint);
82 ++d_count;
83 }
84 std::vector<boost::shared_ptr<O3AConstraint>>::size_type size() {
85 return d_o3aConstraintVect.size();
86 }
87 O3AConstraint *operator[](unsigned int i) {
88 return d_o3aConstraintVect[i].get();
89 }
90
91 private:
92 unsigned int d_count{0};
93 std::vector<boost::shared_ptr<O3AConstraint>> d_o3aConstraintVect;
94 static bool d_compareO3AConstraint(boost::shared_ptr<O3AConstraint> a,
95 boost::shared_ptr<O3AConstraint> b) {
96 return (
97 (a->d_prbIdx != b->d_prbIdx)
98 ? (a->d_prbIdx < b->d_prbIdx)
99 : ((a->d_refIdx != b->d_refIdx)
100 ? (a->d_refIdx < b->d_refIdx)
101 : ((a->d_weight != b->d_weight) ? (a->d_weight > b->d_weight)
102 : (a->d_idx < b->d_idx))));
103 }
104};
105
106const int O3_DUMMY_COST = 100000;
109const unsigned int O3_MAX_H_BINS = 20;
110const unsigned int O3_MAX_SDM_ITERATIONS = 100;
111const unsigned int O3_MAX_SDM_THRESHOLD_ITER = 3;
112const double O3_RANDOM_TRANS_COEFF = 5.0;
113const double O3_THRESHOLD_DIFF_DISTANCE = 0.1;
114const double O3_SDM_THRESHOLD_START = 0.7;
115const double O3_SDM_THRESHOLD_STEP = 0.3;
116const double O3_CHARGE_WEIGHT = 10.0;
117const double O3_CRIPPEN_WEIGHT = 10.0;
118const double O3_RMSD_THRESHOLD = 1.0e-04;
119const double O3_SCORE_THRESHOLD = 0.01;
120const double O3_SCORING_FUNCTION_ALPHA = 5.0;
121const double O3_SCORING_FUNCTION_BETA = 0.5;
122const double O3_CHARGE_COEFF = 5.0;
123const double O3_CRIPPEN_COEFF = 1.0;
125enum {
127 O3_ACCURACY_MASK = (1 << 0 | 1 << 1),
128 O3_LOCAL_ONLY = (1 << 2)
130
132 public:
133 MolHistogram(const ROMol &mol, const double *dmat, bool cleanupDmat = false);
134 ~MolHistogram() = default;
135 inline int get(const unsigned int y, const unsigned int x) const {
136 PRECONDITION(y < d_h.shape()[0], "Invalid index on MolHistogram");
137 PRECONDITION(x < d_h.shape()[1], "Invalid index on MolHistogram");
138 return d_h[y][x];
139 }
140
141 private:
142 boost::multi_array<int, 2> d_h;
143};
144
146 public:
147 LAP(unsigned int dim)
148 : d_rowSol(dim),
149 d_colSol(dim),
150 d_free(dim),
151 d_colList(dim),
152 d_matches(dim),
153 d_d(dim),
154 d_v(dim),
155 d_pred(dim),
156 d_cost(boost::extents[dim][dim]) {}
157 ~LAP() = default;
158 int getCost(const unsigned int i, const unsigned int j) {
159 PRECONDITION(i < d_cost.shape()[0], "Invalid index on LAP.cost");
160 PRECONDITION(j < d_cost.shape()[1], "Invalid index on LAP.cost");
161 return d_cost[i][j];
162 }
163 int getRowSol(const unsigned int i) {
164 PRECONDITION(i < d_rowSol.size(), "Invalid index on LAP.rowSol");
165 return d_rowSol[i];
166 }
167 void computeMinCostPath(const int dim);
168 void computeCostMatrix(const ROMol &prbMol, const MolHistogram &prbHist,
169 const ROMol &refMol, const MolHistogram &refHist,
170 O3AConstraintVect *o3aConstraintVect,
171 int (*costFunc)(const unsigned int, const unsigned int,
172 double, void *),
173 void *data, const unsigned int n_bins = O3_MAX_H_BINS);
174
175 private:
176 std::vector<int> d_rowSol;
177 std::vector<int> d_colSol;
178 std::vector<int> d_free;
179 std::vector<int> d_colList;
180 std::vector<int> d_matches;
181 std::vector<int> d_d;
182 std::vector<int> d_v;
183 std::vector<int> d_pred;
184 boost::multi_array<int, 2> d_cost;
185};
186
188 public:
189 // constructor
190 SDM(const Conformer *prbConf = nullptr, const Conformer *refConf = nullptr,
191 O3AConstraintVect *o3aConstraintVect = nullptr)
192 : d_prbConf(prbConf),
193 d_refConf(refConf),
194 d_o3aConstraintVect(o3aConstraintVect) {}
195 // copy constructor
196 SDM(const SDM &other)
197 : d_prbConf(other.d_prbConf),
198 d_refConf(other.d_refConf),
199 d_o3aConstraintVect(other.d_o3aConstraintVect),
200 d_SDMPtrVect(other.d_SDMPtrVect.size()) {
201 for (unsigned int i = 0; i < d_SDMPtrVect.size(); ++i) {
202 d_SDMPtrVect[i] = boost::shared_ptr<SDMElement>(new SDMElement());
203 memcpy(d_SDMPtrVect[i].get(), other.d_SDMPtrVect[i].get(),
204 sizeof(SDMElement));
205 }
206 }
207 // assignment operator
208 SDM &operator=(const SDM &other) {
209 if (this == &other) {
210 return *this;
211 }
212 d_prbConf = other.d_prbConf;
213 d_refConf = other.d_refConf;
214 d_o3aConstraintVect = other.d_o3aConstraintVect;
215 d_SDMPtrVect.resize(other.d_SDMPtrVect.size());
216 for (unsigned int i = 0; i < d_SDMPtrVect.size(); ++i) {
217 d_SDMPtrVect[i] = boost::shared_ptr<SDMElement>(new SDMElement());
218 memcpy(d_SDMPtrVect[i].get(), other.d_SDMPtrVect[i].get(),
219 sizeof(SDMElement));
220 }
221
222 return *this;
223 }
224 // destructor
225 ~SDM() = default;
226 void fillFromDist(double threshold,
227 const boost::dynamic_bitset<> &refHvyAtoms,
228 const boost::dynamic_bitset<> &prbHvyAtoms);
229 void fillFromLAP(LAP &lap);
230 double scoreAlignment(double (*scoringFunc)(const unsigned int,
231 const unsigned int, void *),
232 void *data);
235 double (*weightFunc)(const unsigned int,
236 const unsigned int, void *),
237 void *data);
238 unsigned int size() { return d_SDMPtrVect.size(); }
239
240 private:
241 typedef struct SDMElement {
242 unsigned int idx[2];
243 int score;
244 int cost;
245 double sqDist;
246 O3AConstraint *o3aConstraint;
247 } SDMElement;
248 const Conformer *d_prbConf;
249 const Conformer *d_refConf;
250 O3AConstraintVect *d_o3aConstraintVect;
251 std::vector<boost::shared_ptr<SDMElement>> d_SDMPtrVect;
252 static bool compareSDMScore(boost::shared_ptr<SDMElement> a,
253 boost::shared_ptr<SDMElement> b) {
254 return ((a->score != b->score)
255 ? (a->score < b->score)
256 : ((a->cost != b->cost)
257 ? (a->cost < b->cost)
258 : ((a->idx[0] != b->idx[0]) ? (a->idx[0] < b->idx[0])
259 : (a->idx[1] < b->idx[1]))));
260 }
261 static bool compareSDMDist(boost::shared_ptr<SDMElement> a,
262 boost::shared_ptr<SDMElement> b) {
263 double aWeight = (a->o3aConstraint ? a->o3aConstraint->getWeight() : 0.0);
264 double bWeight = (b->o3aConstraint ? b->o3aConstraint->getWeight() : 0.0);
265 return ((aWeight != bWeight)
266 ? (aWeight > bWeight)
267 : ((a->sqDist != b->sqDist)
268 ? (a->sqDist < b->sqDist)
269 : ((a->idx[0] != b->idx[0]) ? (a->idx[0] < b->idx[0])
270 : (a->idx[1] < b->idx[1]))));
271 }
272};
273
275 public:
276 //! pre-defined atom typing schemes
277 typedef enum { MMFF94 = 0, CRIPPEN } AtomTypeScheme;
278 O3A(ROMol &prbMol, const ROMol &refMol, void *prbProp, void *refProp,
279 AtomTypeScheme atomTypes = MMFF94, const int prbCid = -1,
280 const int refCid = -1, const bool reflect = false,
281 const unsigned int maxIters = 50, unsigned int options = 0,
282 const MatchVectType *constraintMap = nullptr,
283 const RDNumeric::DoubleVector *constraintWeights = nullptr,
284 LAP *extLAP = nullptr, MolHistogram *extPrbHist = nullptr,
285 MolHistogram *extRefHist = nullptr);
286 O3A(int (*costFunc)(const unsigned int, const unsigned int, double, void *),
287 double (*weightFunc)(const unsigned int, const unsigned int, void *),
288 double (*scoringFunc)(const unsigned int, const unsigned int, void *),
289 void *data, ROMol &prbMol, const ROMol &refMol, const int prbCid,
290 const int refCid, const boost::dynamic_bitset<> &prbHvyAtoms,
291 const boost::dynamic_bitset<> &refHvyAtoms, const bool reflect = false,
292 const unsigned int maxIters = 50, unsigned int options = 0,
293 O3AConstraintVect *o3aConstraintVect = nullptr,
294 ROMol *extWorkPrbMol = nullptr, LAP *extLAP = nullptr,
295 MolHistogram *extPrbHist = nullptr, MolHistogram *extRefHist = nullptr);
297 if (d_o3aMatchVect) {
298 delete d_o3aMatchVect;
299 }
300 if (d_o3aWeights) {
301 delete d_o3aWeights;
302 }
303 }
304 double align();
306 double score() { return d_o3aScore; }
307 const RDKit::MatchVectType *matches() { return d_o3aMatchVect; }
308 const RDNumeric::DoubleVector *weights() { return d_o3aWeights; }
309
310 private:
311 ROMol *d_prbMol;
312 const ROMol *d_refMol;
313 int d_prbCid;
314 int d_refCid;
315 bool d_reflect;
316 unsigned int d_maxIters;
317 const RDKit::MatchVectType *d_o3aMatchVect;
318 const RDNumeric::DoubleVector *d_o3aWeights;
319 double d_o3aScore;
320};
321
323 const int seed = -1);
325 const Conformer &conf);
327 const unsigned int refIdx,
328 double hSum, void *data);
330 const unsigned int refIdx,
331 void *data);
333 const unsigned int refIdx,
334 void *data);
336 const unsigned int refIdx,
337 double hSum, void *data);
339 const unsigned int refIdx,
340 void *data);
342 const unsigned int refIdx,
343 void *data);
344
346 ROMol &prbMol, const ROMol &refMol, void *prbProp, void *refProp,
347 std::vector<boost::shared_ptr<O3A>> &res, int numThreads = 1,
349 const bool reflect = false, const unsigned int maxIters = 50,
350 unsigned int options = 0, const MatchVectType *constraintMap = nullptr,
352} // namespace MolAlign
353} // namespace RDKit
354#endif
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
The class for representing 2D or 3D conformation of a molecule.
Definition Conformer.h:46
LAP(unsigned int dim)
void computeMinCostPath(const int dim)
int getRowSol(const unsigned int i)
int getCost(const unsigned int i, const unsigned int j)
void computeCostMatrix(const ROMol &prbMol, const MolHistogram &prbHist, const ROMol &refMol, const MolHistogram &refHist, O3AConstraintVect *o3aConstraintVect, int(*costFunc)(const unsigned int, const unsigned int, double, void *), void *data, const unsigned int n_bins=O3_MAX_H_BINS)
int get(const unsigned int y, const unsigned int x) const
MolHistogram(const ROMol &mol, const double *dmat, bool cleanupDmat=false)
std::vector< boost::shared_ptr< O3AConstraint > >::size_type size()
O3AConstraint * operator[](unsigned int i)
void append(unsigned int prbIdx, unsigned int refIdx, double weight)
const RDNumeric::DoubleVector * weights()
double trans(RDGeom::Transform3D &trans)
O3A(int(*costFunc)(const unsigned int, const unsigned int, double, void *), double(*weightFunc)(const unsigned int, const unsigned int, void *), double(*scoringFunc)(const unsigned int, const unsigned int, void *), void *data, ROMol &prbMol, const ROMol &refMol, const int prbCid, const int refCid, const boost::dynamic_bitset<> &prbHvyAtoms, const boost::dynamic_bitset<> &refHvyAtoms, const bool reflect=false, const unsigned int maxIters=50, unsigned int options=0, O3AConstraintVect *o3aConstraintVect=nullptr, ROMol *extWorkPrbMol=nullptr, LAP *extLAP=nullptr, MolHistogram *extPrbHist=nullptr, MolHistogram *extRefHist=nullptr)
O3A(ROMol &prbMol, const ROMol &refMol, void *prbProp, void *refProp, AtomTypeScheme atomTypes=MMFF94, const int prbCid=-1, const int refCid=-1, const bool reflect=false, const unsigned int maxIters=50, unsigned int options=0, const MatchVectType *constraintMap=nullptr, const RDNumeric::DoubleVector *constraintWeights=nullptr, LAP *extLAP=nullptr, MolHistogram *extPrbHist=nullptr, MolHistogram *extRefHist=nullptr)
AtomTypeScheme
pre-defined atom typing schemes
const RDKit::MatchVectType * matches()
SDM & operator=(const SDM &other)
void fillFromLAP(LAP &lap)
void fillFromDist(double threshold, const boost::dynamic_bitset<> &refHvyAtoms, const boost::dynamic_bitset<> &prbHvyAtoms)
double scoreAlignment(double(*scoringFunc)(const unsigned int, const unsigned int, void *), void *data)
void prepareMatchWeightsVect(RDKit::MatchVectType &matchVect, RDNumeric::DoubleVector &weights, double(*weightFunc)(const unsigned int, const unsigned int, void *), void *data)
SDM(const SDM &other)
SDM(const Conformer *prbConf=nullptr, const Conformer *refConf=nullptr, O3AConstraintVect *o3aConstraintVect=nullptr)
A class to represent vectors of numbers.
Definition Vector.h:29
#define RDKIT_MOLALIGN_EXPORT
Definition export.h:273
std::vector< Point3D > POINT3D_VECT
Definition point.h:546
RDKIT_MOLALIGN_EXPORT void getO3AForProbeConfs(ROMol &prbMol, const ROMol &refMol, void *prbProp, void *refProp, std::vector< boost::shared_ptr< O3A > > &res, int numThreads=1, O3A::AtomTypeScheme atomTypes=O3A::MMFF94, const int refCid=-1, const bool reflect=false, const unsigned int maxIters=50, unsigned int options=0, const MatchVectType *constraintMap=nullptr, const RDNumeric::DoubleVector *constraintWeights=nullptr)
const int O3_MAX_WEIGHT_COEFF
const int O3_LARGE_NEGATIVE_WEIGHT
const int O3_DEFAULT_CONSTRAINT_WEIGHT
RDKIT_MOLALIGN_EXPORT double o3aCrippenWeightFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data)
const double O3_SDM_THRESHOLD_START
const double O3_RMSD_THRESHOLD
const double O3_CRIPPEN_WEIGHT
const double O3_CHARGE_COEFF
const double O3_RANDOM_TRANS_COEFF
const unsigned int O3_MAX_SDM_ITERATIONS
const double O3_SDM_THRESHOLD_STEP
const double O3_CRIPPEN_COEFF
RDKIT_MOLALIGN_EXPORT int o3aMMFFCostFunc(const unsigned int prbIdx, const unsigned int refIdx, double hSum, void *data)
const unsigned int O3_MAX_SDM_THRESHOLD_ITER
const double O3_SCORING_FUNCTION_BETA
RDKIT_MOLALIGN_EXPORT void randomTransform(ROMol &mol, const int cid=-1, const int seed=-1)
const unsigned int O3_MAX_H_BINS
const double O3_SCORING_FUNCTION_ALPHA
bool isDoubleZero(const double x)
const double O3_THRESHOLD_DIFF_DISTANCE
RDKIT_MOLALIGN_EXPORT int o3aCrippenCostFunc(const unsigned int prbIdx, const unsigned int refIdx, double hSum, void *data)
RDKIT_MOLALIGN_EXPORT double o3aCrippenScoringFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data)
RDKIT_MOLALIGN_EXPORT double o3aMMFFScoringFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data)
const double O3_CHARGE_WEIGHT
const double O3_SCORE_THRESHOLD
RDKIT_MOLALIGN_EXPORT const RDGeom::POINT3D_VECT * reflect(const Conformer &conf)
RDKIT_MOLALIGN_EXPORT double o3aMMFFWeightFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data)
Std stuff.
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
bool rdvalue_is(const RDValue_cast_t)
Definition RDLog.h:25