RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SynthonSpaceSearcher.h
Go to the documentation of this file.
1//
2// Copyright (C) David Cosgrove 2024.
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11// This file declares an abstract base class for searching a synthon
12// space. Concrete base classes include SynthonSpaceSubstructureSearcher
13// and SynthonSpaceFingerprintSearcher.
14
15#ifndef SYNTHONSPACESEARCHER_H
16#define SYNTHONSPACESEARCHER_H
17
18#include <chrono>
19#include <random>
20
21#include <RDGeneral/export.h>
25#include <boost/spirit/home/support/common_terminals.hpp>
26
27using Clock = std::chrono::steady_clock;
28using TimePoint = std::chrono::time_point<Clock>;
29
30namespace RDKit {
31class ROMol;
32
33namespace SynthonSpaceSearch {
34
35// Abstract base class for searching the SynthonSpace.
37 public:
40 const SynthonSpaceSearchParams &params,
46
47 virtual ~SynthonSpaceSearcher() = default;
48
50
51 SynthonSpace &getSpace() const { return d_space; }
52 const ROMol &getQuery() const { return d_query; }
53 const SynthonSpaceSearchParams &getParams() const { return d_params; }
54
55 // Do the search of this fragSet against the SynthonSet in the
56 // appropriate way, for example by substructure or fingerprint
57 // similarity.
58 virtual std::vector<std::unique_ptr<SynthonSpaceHitSet>> searchFragSet(
59 const std::vector<std::unique_ptr<ROMol>> &fragSet,
60 const SynthonSet &reaction) const = 0;
61
62 // Make the hit, constructed from a specific combination of
63 // synthons in the hitset, and verify that it matches the
64 // query in the appropriate way. There'll be 1 entry in synthNums
65 // for each synthon list in the hitset. Returns an empty pointer
66 // if the hit isn't accepted for whatever reason.
67 std::unique_ptr<ROMol> buildAndVerifyHit(
69 const std::vector<size_t> &synthNums) const;
70
71 private:
72 std::unique_ptr<std::mt19937> d_randGen;
73
74 const ROMol &d_query;
75 const SynthonSpaceSearchParams &d_params;
76 SynthonSpace &d_space;
77
78 // Some of the search methods might need extra setup of the fragment
79 // sets. The FingerprintSearcher, for example, needs fingerprints
80 // for all the fragments. The SubstructureSearcher needs connector
81 // regions and information about them.
82 virtual void extraSearchSetup(
83 [[maybe_unused]] std::vector<std::vector<std::unique_ptr<ROMol>>>
84 &fragSets) {}
85
86 std::vector<std::unique_ptr<SynthonSpaceHitSet>> doTheSearch(
87 std::vector<std::vector<std::unique_ptr<ROMol>>> &fragSets,
88 const TimePoint *endTime, bool &timedOut, std::uint64_t &totHits);
89
90 // Some of the search methods (fingerprints, for example) can do a quick
91 // check on whether this set of synthons can match the query without having to
92 // build the full molecule from the synthons. They will over-ride this
93 // function which by default passes everything.
94 virtual bool quickVerify(
96 [[maybe_unused]] const std::vector<size_t> &synthNums) const {
97 return true;
98 }
99 // Checks that the given molecule is definitely a hit according to
100 // the derived class' criteria.
101 virtual bool verifyHit(const ROMol &mol) const = 0;
102
103 // Build the molecules from the synthons identified in hitsets.
104 // Checks that all the results produced match the
105 // query. Duplicates by name are not returned,
106 // but duplicate SMILES from different reactions will be.
107 // Hitsets will be re-ordered on exit.
108 void buildHits(std::vector<std::unique_ptr<SynthonSpaceHitSet>> &hitsets,
109 const TimePoint *endTime, bool &timedOut,
110 std::vector<std::unique_ptr<ROMol>> &results) const;
111 void buildAllHits(
112 const std::vector<std::unique_ptr<SynthonSpaceHitSet>> &hitsets,
113 const TimePoint *endTime, bool &timedOut,
114 std::vector<std::unique_ptr<ROMol>> &results) const;
115 void makeHitsFromToTry(
116 const std::vector<
117 std::pair<const SynthonSpaceHitSet *, std::vector<size_t>>> &toTry,
118 const TimePoint *endTime,
119 std::vector<std::unique_ptr<ROMol>> &results) const;
120 void processToTrySet(
121 std::vector<std::pair<const SynthonSpaceHitSet *, std::vector<size_t>>>
122 &toTry,
123 const TimePoint *endTime,
124 std::vector<std::unique_ptr<ROMol>> &results) const;
125
126 // get the subset of synthons for the given reaction to use for this
127 // enumeration.
128 std::vector<std::vector<ROMol *>> getSynthonsToUse(
129 const std::vector<boost::dynamic_bitset<>> &synthonsToUse,
130 const std::string &reaction_id) const;
131};
132
133#if 0
134 // Build the molecules from the synthons identified in hitsets.
135 // Checks that all the results produced match the
136 // query. totHits is the maximum number of hits that are possible from
137 // the hitsets, including duplicates. Duplicates by name are not returned,
138 // but duplicate SMILES from different reactions will be. Hitsets will
139 // be re-ordered on exit.
140 void buildHits(std::vector<std::unique_ptr<SynthonSpaceHitSet>> &hitsets,
141 size_t totHits, const TimePoint *endTime, bool &timedOut,
142 std::vector<std::unique_ptr<ROMol>> &results) const;
143 void buildAllHits(
144 const std::vector<std::unique_ptr<SynthonSpaceHitSet>> &hitsets,
145 std::set<std::string> &resultsNames, const TimePoint *endTime,
146 bool &timedOut, std::vector<std::unique_ptr<ROMol>> &results) const;
147 void buildRandomHits(
148 const std::vector<std::unique_ptr<SynthonSpaceHitSet>> &hitsets,
149 size_t totHits, std::set<std::string> &resultsNames,
150 const TimePoint *endTime, bool &timedOut,
151 std::vector<std::unique_ptr<ROMol>> &results) const;
152 // get the subset of synthons for the given reaction to use for this
153 // enumeration.
154 std::vector<std::vector<ROMol *>> getSynthonsToUse(
155 const std::vector<boost::dynamic_bitset<>> &synthonsToUse,
156 const std::string &reaction_id) const;
157};
158#endif
159
160} // namespace SynthonSpaceSearch
161} // namespace RDKit
162#endif // SYNTHONSPACESEARCHER_H
std::chrono::steady_clock Clock
std::chrono::time_point< Clock > TimePoint
contains a class for searching combinatorial libraries in Synthon format such as Enamine REAL.
SynthonSpaceSearcher(const ROMol &query, const SynthonSpaceSearchParams &params, SynthonSpace &space)
SynthonSpaceSearcher(SynthonSpaceSearcher &&other)=delete
SynthonSpaceSearcher(const SynthonSpaceSearcher &other)=delete
virtual std::vector< std::unique_ptr< SynthonSpaceHitSet > > searchFragSet(const std::vector< std::unique_ptr< ROMol > > &fragSet, const SynthonSet &reaction) const =0
SynthonSpaceSearcher & operator=(const SynthonSpaceSearcher &other)=delete
const SynthonSpaceSearchParams & getParams() const
std::unique_ptr< ROMol > buildAndVerifyHit(const SynthonSpaceHitSet *hitset, const std::vector< size_t > &synthNums) const
SynthonSpaceSearcher & operator=(SynthonSpaceSearcher &&other)=delete
Std stuff.
bool rdvalue_is(const RDValue_cast_t)