RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SubstructMatch.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2020 Greg Landrum and Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SUBSTRUCTMATCH_H
12#define RD_SUBSTRUCTMATCH_H
13
14// std bits
15#include <vector>
16
17#include <unordered_set>
18#include <functional>
19#include <unordered_map>
20#include <cstdint>
21#include <string>
22
23#include <boost/dynamic_bitset.hpp>
24#if BOOST_VERSION >= 107100
25#define RDK_INTERNAL_BITSET_HAS_HASH
26#endif
27
29
30namespace RDKit {
31class ROMol;
32class Atom;
33class Bond;
34class ResonanceMolSupplier;
35class MolBundle;
36
37//! \brief used to return matches from substructure searching,
38//! The format is (queryAtomIdx, molAtomIdx)
39typedef std::vector<std::pair<int, int>> MatchVectType;
40
42 bool useChirality = false; //!< Use chirality in determining whether or not
43 //!< atoms/bonds match
44 bool useEnhancedStereo = false; //!< Use enhanced stereochemistry in
45 //!< determining whether atoms/bonds match
46 bool aromaticMatchesConjugated = false; //!< Aromatic and conjugated bonds
47 //!< match each other
48 bool useQueryQueryMatches = false; //!< Consider query-query matches, not
49 //!< just simple matches
50 bool useGenericMatchers = false; //!< Looks for generic atoms in the query
51 //!< and uses them as part of the matching
52 bool recursionPossible = true; //!< Allow recursive queries
53 bool uniquify = true; //!< uniquify (by atom index) match results
54 unsigned int maxMatches = 1000; //!< maximum number of matches to return
55 int numThreads = 1; //!< number of threads to use when multi-threading
56 //!< is possible. 0 selects the number of
57 //!< concurrent threads supported by the hardware
58 //!< negative values are added to the number of
59 //!< concurrent threads supported by the hardware
60 std::vector<std::string> atomProperties; //!< atom properties that must be
61 //!< equivalent in order to match
62 std::vector<std::string> bondProperties; //!< bond properties that must be
63 //!< equivalent in order to match
64 std::function<bool(const ROMol &mol,
65 const std::vector<unsigned int> &match)>
66 extraFinalCheck; //!< a function to be called at the end to validate a
67 //!< match
68 unsigned int maxRecursiveMatches =
69 1000; //!< maximum number of matches that the recursive substructure
70 //!< matching should return
71 bool specifiedStereoQueryMatchesUnspecified =
72 false; //!< If set, query atoms and bonds with specified stereochemistry
73 //!< will match atoms and bonds with unspecified stereochemistry
75};
76
78 SubstructMatchParameters &params, const std::string &json);
80 const SubstructMatchParameters &params);
81
82//! Find a substructure match for a query in a molecule
83/*!
84 \param mol The ROMol to be searched
85 \param query The query ROMol
86 \param matchParams Parameters controlling the matching
87
88 \return The matches, if any
89
90*/
92 const ROMol &mol, const ROMol &query,
94
95//! Find all substructure matches for a query in a ResonanceMolSupplier object
96/*!
97 \param resMolSuppl The ResonanceMolSupplier object to be searched
98 \param query The query ROMol
99 \param matchParams Parameters controlling the matching
100
101 \return The matches, if any
102
103*/
107
109 const MolBundle &bundle, const ROMol &query,
112 const ROMol &mol, const MolBundle &query,
115 const MolBundle &bundle, const MolBundle &query,
117
118//! Find a substructure match for a query
119/*!
120 \param mol The object to be searched
121 \param query The query
122 \param matchVect Used to return the match
123 (pre-existing contents will be deleted)
124 \param recursionPossible flags whether or not recursive matches are allowed
125 \param useChirality use atomic CIP codes as part of the comparison
126 \param useQueryQueryMatches if set, the contents of atom and bond queries
127 will be used as part of the matching
128
129 \return whether or not a match was found
130
131*/
132template <typename T1, typename T2>
133bool SubstructMatch(T1 &mol, const T2 &query, MatchVectType &matchVect,
134 bool recursionPossible = true, bool useChirality = false,
135 bool useQueryQueryMatches = false) {
137 params.recursionPossible = recursionPossible;
138 params.useChirality = useChirality;
139 params.useQueryQueryMatches = useQueryQueryMatches;
140 params.maxMatches = 1;
141 std::vector<MatchVectType> matchVects = SubstructMatch(mol, query, params);
142 if (matchVects.size()) {
143 matchVect = matchVects.front();
144 } else {
145 matchVect.clear();
146 }
147 return matchVect.size() != 0;
148};
149
150//! Find all substructure matches for a query
151/*!
152 \param mol The object to be searched
153 \param query The query
154 \param matchVect Used to return the matches
155 (pre-existing contents will be deleted)
156 \param uniquify Toggles uniquification (by atom index) of the results
157 \param recursionPossible flags whether or not recursive matches are allowed
158 \param useChirality use atomic CIP codes as part of the comparison
159 \param useQueryQueryMatches if set, the contents of atom and bond queries
160 will be used as part of the matching
161 \param maxMatches The maximum number of matches that will be returned.
162 In high-symmetry cases with medium-sized molecules, it is
163 very
164 easy to end up with a combinatorial explosion in the
165 number of
166 possible matches. This argument prevents that from having
167 unintended consequences
168
169 \return the number of matches found
170
171*/
172template <typename T1, typename T2>
173unsigned int SubstructMatch(T1 &mol, const T2 &query,
174 std::vector<MatchVectType> &matchVect,
175 bool uniquify = true, bool recursionPossible = true,
176 bool useChirality = false,
177 bool useQueryQueryMatches = false,
178 unsigned int maxMatches = 1000,
179 int numThreads = 1) {
181 params.uniquify = uniquify;
182 params.recursionPossible = recursionPossible;
183 params.useChirality = useChirality;
184 params.useQueryQueryMatches = useQueryQueryMatches;
185 params.maxMatches = maxMatches;
186 params.numThreads = numThreads;
187 matchVect = SubstructMatch(mol, query, params);
188 return static_cast<unsigned int>(matchVect.size());
189};
190
191// ----------------------------------------------
192//
193// find one match in ResonanceMolSupplier object
194//
195template <>
197 const ROMol &query, MatchVectType &matchVect,
198 bool recursionPossible, bool useChirality,
199 bool useQueryQueryMatches) {
201 params.recursionPossible = recursionPossible;
202 params.useChirality = useChirality;
203 params.useQueryQueryMatches = useQueryQueryMatches;
204 params.maxMatches = 1;
205 std::vector<MatchVectType> matchVects =
206 SubstructMatch(resMolSupplier, query, params);
207 if (matchVects.size()) {
208 matchVect = matchVects.front();
209 } else {
210 matchVect.clear();
211 }
212 return matchVect.size() != 0;
213}
214
215template <>
217 const ROMol &query,
218 std::vector<MatchVectType> &matchVect,
219 bool uniquify, bool recursionPossible,
220 bool useChirality, bool useQueryQueryMatches,
221 unsigned int maxMatches, int numThreads) {
223 params.uniquify = uniquify;
224 params.recursionPossible = recursionPossible;
225 params.useChirality = useChirality;
226 params.useQueryQueryMatches = useQueryQueryMatches;
227 params.maxMatches = maxMatches;
228 params.numThreads = numThreads;
229 matchVect = SubstructMatch(resMolSupplier, query, params);
230 return static_cast<unsigned int>(matchVect.size());
231};
232
233//! Class used as a final step to confirm whether or not a given atom->atom
234//! mapping is a valid substructure match.
236 public:
237 MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol,
238 const SubstructMatchParameters &ps);
239
240 bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[]);
241
242 private:
243 const ROMol &d_query;
244 const ROMol &d_mol;
245 const SubstructMatchParameters &d_params;
246 std::unordered_map<unsigned int, StereoGroup const *> d_molStereoGroups;
247#ifdef RDK_INTERNAL_BITSET_HAS_HASH
248 // Boost 1.71 added support for std::hash with dynamic_bitset.
249 using HashedStorageType = boost::dynamic_bitset<>;
250#else
251 // otherwise we use a less elegant solution
252 using HashedStorageType = std::string;
253#endif
254 std::unordered_set<HashedStorageType> matchesSeen;
255};
256
257} // namespace RDKit
258
259#endif
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
MolBundle contains a collection of related ROMols.
Definition MolBundle.h:59
MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol, const SubstructMatchParameters &ps)
bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[])
#define RDKIT_SUBSTRUCTMATCH_EXPORT
Definition export.h:537
Std stuff.
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
bool rdvalue_is(const RDValue_cast_t)
RDKIT_SUBSTRUCTMATCH_EXPORT std::vector< MatchVectType > SubstructMatch(const ROMol &mol, const ROMol &query, const SubstructMatchParameters &params=SubstructMatchParameters())
Find a substructure match for a query in a molecule.
RDKIT_SUBSTRUCTMATCH_EXPORT void updateSubstructMatchParamsFromJSON(SubstructMatchParameters &params, const std::string &json)
RDKIT_SUBSTRUCTMATCH_EXPORT std::string substructMatchParamsToJSON(const SubstructMatchParameters &params)
unsigned int maxMatches
maximum number of matches to return
bool uniquify
uniquify (by atom index) match results
std::vector< std::string > atomProperties
std::vector< std::string > bondProperties
bool recursionPossible
Allow recursive queries.
std::function< bool(const ROMol &mol, const std::vector< unsigned int > &match)> extraFinalCheck