RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
RascalResult.h
Go to the documentation of this file.
1//
2// Copyright (C) David Cosgrove 2023
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9
10// A class to hold the results of a RASCAL MCES determination
11// between 2 molecules. Contains the bonds and atoms that
12// correspond between the molecules, and also a SMARTS pattern
13// defining the MCES.
14//
15#include <RDGeneral/export.h>
16
17#ifndef RASCALRESULT_H
18#define RASCALRESULT_H
19
20#include <vector>
21
22#include <GraphMol/ROMol.h>
23
24namespace RDKit {
25
26namespace RascalMCES {
27
29 public:
30 RascalResult(const RDKit::ROMol &mol1, const RDKit::ROMol &mol2,
31 const std::vector<std::vector<int>> &adjMatrix1,
32 const std::vector<std::vector<int>> &adjMatrix2,
33 const std::vector<unsigned int> &clique,
34 const std::vector<std::pair<int, int>> &vtx_pairs, bool timedOut,
35 bool swapped, double tier1Sim, double tier2Sim,
36 bool ringMatchesRingOnly, bool singleLargestFrag, int minFragSep,
37 bool exactConnectionsMatch = false,
38 const std::string &equivalentAtoms = "",
39 bool ignoreBondOrders = false);
40 // For when the tier[12]Sim didn't hit the threshold, but it
41 // might be of interest what the estimates of similarity were.
42 RascalResult(double tier1Sim, double tier2Sim);
43
45
46 RascalResult(RascalResult &&other) = default;
47
48 ~RascalResult() = default;
49
51
52 RascalResult &operator=(RascalResult &&other) = default;
53
54 // Cut the result down to the single largest fragment. This is
55 // irrecoverably destructive.
57 void largestFragsOnly(unsigned int numFrags = 2);
58 void trimSmallFrags(unsigned int minFragSize = 3);
59
60 std::vector<std::pair<int, int>> getBondMatches() const {
61 return d_bondMatches;
62 }
63
64 std::vector<std::pair<int, int>> getAtomMatches() const {
65 return d_atomMatches;
66 }
67
68 // The following 5 functions are used in resultCompare to rank
69 // 2 MCES of the same size for the same pair of molecules.
70 // returns the number of contiguous fragments in the MCES.
71 int getNumFrags() const;
72
73 // returns how many bonds in the clique don't match
74 // cyclic/non-cyclic i.e. count as a matche in the MCES but
75 // are ring bonds in one of the molecules and not in the other.
77
78 // returns a score for how well the atoms in the clique from mol1 match the
79 // atoms for the clique in mol2. Currently, the atom scores are the
80 // difference in H count for matching atoms, and summed for the molecule. Its
81 // so that, for example, an OH in mol1 that could match an OH or OMe matches
82 // the OH for preference.
83 int getAtomMatchScore() const;
84
85 // returns a score for the maximum difference in through-bond distance for
86 // pairs of matching atoms in the 2 molecules. An MCES where 2 atoms
87 // are far apart in one molecule and the corresponding atoms are close
88 // together in the other will get a high score by this measure.
90
91 // returns the number of atoms in the largest contiguous fragment
92 // in the MCES.
93 unsigned int getLargestFragSize() const;
94
95 std::string getSmarts() const;
96 const std::shared_ptr<ROMol> getMcesMol() const;
97 bool getTimedOut() const { return d_timedOut; };
98
99 double getTier1Sim() const { return d_tier1Sim; }
100 double getTier2Sim() const { return d_tier2Sim; }
101 double getSimilarity() const;
102
103 private:
104 std::shared_ptr<ROMol> d_mol1;
105 std::shared_ptr<ROMol> d_mol2;
106 mutable std::shared_ptr<ROMol> d_mcesMol;
107 std::vector<std::pair<int, int>> d_bondMatches;
108 std::vector<std::pair<int, int>> d_atomMatches;
109
110 mutable std::string d_smarts;
111 bool d_timedOut{false};
112 double d_tier1Sim;
113 double d_tier2Sim;
114 bool d_ringMatchesRingOnly{false};
115 int d_maxFragSep{-1};
116 bool d_exactConnectionsMatch{false};
117 std::string d_equivalentAtoms{""};
118 bool d_ignoreBondOrders{false};
119
120 // These are used for sorting the results.
121 mutable int d_numFrags{-1};
122 mutable int d_ringNonRingBondScore{-1};
123 mutable int d_atomMatchScore{-1};
124 mutable int d_maxDeltaAtomAtomDist{-1};
125 mutable int d_largestFragSize{-1};
126
127 // Assuming the frags are all part of the original MCES, just cut it
128 // down to what's in the frags.
129 void rebuildFromFrags(const std::vector<boost::shared_ptr<ROMol>> &frags);
130
131 std::string createSmartsString() const;
132
133 void matchCliqueAtoms(const std::vector<std::vector<int>> &mol1_adj_matrix);
134
135 // If the clique involves a fragment that is more than d_maxFragSep from
136 // any other frag in either molecule, discard the smaller frag.
137 void applyMaxFragSep();
138
139 // Make the fragments for either mol1 or mol2. If molNum is not 1 or 2,
140 // returns nullptr.
141 RDKit::ROMol *makeMolFrags(int molNum) const;
142
143 int calcRingNonRingScore() const;
144
145 int calcAtomMatchScore() const;
146
147 int calcLargestFragSize() const;
148
149 // If there are multiple fragments, can be helpful as a tie-breaker. It's the
150 // maximum difference between through-bond distances between matching atoms in
151 // the 2 molecules.
152 int calcMaxDeltaAtomAtomDistScore() const;
153};
154
155} // namespace RascalMCES
156} // namespace RDKit
157
158#endif // RASCALRESULT_H
Defines the primary molecule class ROMol as well as associated typedefs.
std::string getSmarts() const
std::vector< std::pair< int, int > > getAtomMatches() const
RascalResult(double tier1Sim, double tier2Sim)
void trimSmallFrags(unsigned int minFragSize=3)
unsigned int getLargestFragSize() const
std::vector< std::pair< int, int > > getBondMatches() const
RascalResult & operator=(const RascalResult &other)
RascalResult & operator=(RascalResult &&other)=default
const std::shared_ptr< ROMol > getMcesMol() const
RascalResult(RascalResult &&other)=default
void largestFragsOnly(unsigned int numFrags=2)
RascalResult(const RDKit::ROMol &mol1, const RDKit::ROMol &mol2, const std::vector< std::vector< int > > &adjMatrix1, const std::vector< std::vector< int > > &adjMatrix2, const std::vector< unsigned int > &clique, const std::vector< std::pair< int, int > > &vtx_pairs, bool timedOut, bool swapped, double tier1Sim, double tier2Sim, bool ringMatchesRingOnly, bool singleLargestFrag, int minFragSep, bool exactConnectionsMatch=false, const std::string &equivalentAtoms="", bool ignoreBondOrders=false)
RascalResult(const RascalResult &other)
#define RDKIT_RASCALMCES_EXPORT
Definition export.h:449
Std stuff.