RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
RGroupScore.h
Go to the documentation of this file.
1//
2// Copyright (C) 2017 Novartis Institutes for BioMedical Research
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#ifndef RGROUP_SCORE_H
11#define RGROUP_SCORE_H
12
13#include "RGroupMatch.h"
14#include <vector>
15#include <deque>
16#include <set>
17namespace RDKit {
18
19//! iterate through all possible permutations of the rgroups
21 std::vector<size_t> permutation;
22 std::vector<size_t> sizes;
23 std::deque<size_t> bases;
26 CartesianProduct(const std::vector<size_t> &inputSizes)
27 : permutation(inputSizes.size(), 0),
31 for (unsigned long size : sizes) {
32 bases.push_front(maxPermutations);
33 maxPermutations *= size; // may overflow....
34 }
35 }
36
37 bool next() {
39 if (permutationCount == 1) {
40 return true;
41 }
42
43 return increment(0);
44 }
45
46 size_t value(const std::vector<size_t> &p) const {
47 size_t v = 0;
48 for (size_t i = 0; i < p.size(); ++i) {
49 v += bases[i] * p[i];
50 }
51 return v;
52 }
53
54 size_t value() { return value(permutation); }
55
58 return false;
59 }
60
65 return increment(rowToIncrement + 1);
66 }
67 return true;
68 }
69};
70
72 public:
74 RGroupScorer(const std::vector<std::vector<size_t>> &permutations,
75 double score);
76 //! score the passed permutation of matches
77 double matchScore(const std::vector<size_t> &permutation,
78 const std::vector<std::vector<RGroupMatch>> &matches,
79 const std::set<int> &labels);
80 //! set the passed permutation and score as the best one
81 void setBestPermutation(const std::vector<size_t> &permutation, double score);
82 //! return the best permutation found so far
83 const std::vector<size_t> &getBestPermutation() const {
84 return d_saved.permutation;
85 }
86 //! called when process() starts to initialize State
88 //! store the passed tied permutation for subsequent processing
89 void pushTieToStore(const std::vector<size_t> &permutation);
90 //! find the best permutation across the tied ones that were stored
91 void breakTies(const std::vector<std::vector<RGroupMatch>> &matches,
92 const std::set<int> &labels,
93 const std::unique_ptr<CartesianProduct> &iterator,
94 const std::chrono::steady_clock::time_point &t0,
95 double timeout);
96 //! clear all stored tied permutations
98 //! number of stored tied permutations
99 size_t tieStoreSize() const { return d_store.size(); }
100 //! return the best score found so far
101 double getBestScore() const { return d_bestScore; }
102
103 private:
104 void restoreInitialState() { d_current = d_initial; }
105 struct RLabelData {
106 int numRGroups = 0;
107 std::vector<std::map<std::string, unsigned int>> matchSetVect;
108 std::map<std::set<int>, size_t> linkerMatchSet;
109 };
110 // The State structure stores the state of the RGroupScorer
111 // This allows more efficient scoring of permutations, in that
112 // the score of pruned permutations, which are effectively frozen,
113 // are cached in the State rather than being recomputed on-the-fly
114 // while only permutations in the last chunk are actually scored
115 struct State {
116 // compute the criteria according to which the best
117 // permutation is found across the tied ones
118 void computeTieBreakingCriteria(
119 const std::vector<std::vector<RGroupMatch>> &matches,
120 const std::vector<int> &orderedLabels, std::vector<int> &heavyCounts) {
121 // heavyCounts is a vector which has the same size of labels
122 // for each label we add an increment if a molecule
123 // bears an R-group at that label
124 PRECONDITION(permutation.size() <= matches.size(),
125 "permutation.size() should be <= matches.size()");
126 size_t offset = matches.size() - permutation.size();
127 // numMatchedUserRGroups counts the total number of user labelled r
128 // groups filled in this permutation. We want to maximize this number
129 size_t i = 0;
130 for (int label : orderedLabels) {
131 for (size_t m = 0; m < permutation.size(); ++m) { // for each molecule
132 // Negative labels are assigned to R-groups that were found along
133 // the way (when onlyMatchAtRGroups=false) rather than being
134 // user-specified. For each molecule, check if we add an R-group at
135 // this negative label; if we do, count it once. So we know how many
136 // different negative labels we have filled: we prefer permutations
137 // which fill less, as it means we have added less groups on different
138 // positions
139 const auto &match = matches[m + offset][permutation[m]];
140 auto rg = match.rgroups.find(label);
141 if (rg != match.rgroups.end() && !rg->second->is_hydrogen) {
142 if (label < 0 && heavyCounts.at(i) == 0) {
143 ++numAddedRGroups;
144 } else if (label > 0) {
145 ++numMatchedUserRGroups;
146 }
147 ++heavyCounts[i];
148 }
149 }
150 ++i;
151 }
152 }
153
154 int N = 0;
155 int numAddedRGroups = 0;
156 int numMatchedUserRGroups = 0;
157 std::map<int, int> heavyCountPerLabel;
158 std::map<int, RLabelData> labelDataMap;
159 std::vector<size_t> permutation;
160 };
161 double d_bestScore = 0.0;
162 // the current State
163 State d_current;
164 // the initial state when process() is called
165 State d_initial;
166 // the best State found so far
167 State d_saved;
168 // the States associated to each tied permutation
169 std::deque<State> d_store;
170};
171
172} // namespace RDKit
173#endif
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
void pushTieToStore(const std::vector< size_t > &permutation)
store the passed tied permutation for subsequent processing
void startProcessing()
called when process() starts to initialize State
void setBestPermutation(const std::vector< size_t > &permutation, double score)
set the passed permutation and score as the best one
void clearTieStore()
clear all stored tied permutations
const std::vector< size_t > & getBestPermutation() const
return the best permutation found so far
Definition RGroupScore.h:83
void breakTies(const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels, const std::unique_ptr< CartesianProduct > &iterator, const std::chrono::steady_clock::time_point &t0, double timeout)
find the best permutation across the tied ones that were stored
double matchScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels)
score the passed permutation of matches
size_t tieStoreSize() const
number of stored tied permutations
Definition RGroupScore.h:99
double getBestScore() const
return the best score found so far
RGroupScorer(const std::vector< std::vector< size_t > > &permutations, double score)
#define RDKIT_RGROUPDECOMPOSITION_EXPORT
Definition export.h:441
Std stuff.
bool rdvalue_is(const RDValue_cast_t)
iterate through all possible permutations of the rgroups
Definition RGroupScore.h:20
std::vector< size_t > sizes
Definition RGroupScore.h:22
std::deque< size_t > bases
Definition RGroupScore.h:23
size_t value(const std::vector< size_t > &p) const
Definition RGroupScore.h:46
CartesianProduct(const std::vector< size_t > &inputSizes)
Definition RGroupScore.h:26
bool increment(size_t rowToIncrement)
Definition RGroupScore.h:56
std::vector< size_t > permutation
Definition RGroupScore.h:21