RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SynthonSpaceSearch_details.h
Go to the documentation of this file.
1//
2// Copyright (C) David Cosgrove 2024.
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#ifndef RDKIT_SYNTHONSPACESEARCHDETAILS_H
12#define RDKIT_SYNTHONSPACESEARCHDETAILS_H
13
14#include <chrono>
15#include <vector>
16
18#include <RDGeneral/export.h>
20
21using Clock = std::chrono::steady_clock;
22using TimePoint = std::chrono::time_point<Clock>;
23
24namespace RDKit {
25class ROMol;
26namespace SynthonSpaceSearch::details {
27
29
30// Find all combinations of M things selected from N.
31RDKIT_SYNTHONSPACESEARCH_EXPORT std::vector<std::vector<unsigned int>>
32combMFromN(unsigned int m, unsigned int n);
33// Find all permutations of M things selected from N.
34RDKIT_SYNTHONSPACESEARCH_EXPORT std::vector<std::vector<unsigned int>>
35permMFromN(unsigned int m, unsigned int n);
36
37// Split the molecule into fragments. maxNumFrags gives the maximum number
38// of fragments to be produced in each set. There will a vector of vectors of
39// molecules. Each inner vector contains the fragments from a split molecule.
40// The maxNumFrags will be constrained to the maximum number of synthons in
41// the search space as there's no point making more fragments than that.
42// Any complex query atoms will be stripped out of the fragments and replaced
43// by a simple atom query.
44RDKIT_SYNTHONSPACESEARCH_EXPORT std::vector<std::vector<std::unique_ptr<ROMol>>>
45splitMolecule(const ROMol &query, unsigned int maxNumFrags,
46 const std::uint64_t maxNumFragSets, const TimePoint *endTime,
47 const int numThreads, bool &timedOut);
48// Counts the number of [1*], [2*]...[4*] in the string.
50
51// Return a bitset for each fragment giving the connector patterns
52RDKIT_SYNTHONSPACESEARCH_EXPORT std::vector<boost::dynamic_bitset<>>
53getConnectorPatterns(const std::vector<std::unique_ptr<ROMol>> &fragSet);
54
55// Return a bitset giving the different connector types in this
56// molecule.
58 const std::vector<std::unique_ptr<ROMol>> &fragSet);
59
60// Gets the permutations of connector numbers and the atoms they should
61// be applied to in the molFrags.
62// E.g. if the reaction has 3 connectors, 1, 2 and 3 and the fragged mol has
63// 2, return all permutations of 2 from 3. It's ok if the fragged mol doesn't
64// have all the connections in the reaction, although this may well result in
65// a lot of hits.
67std::vector<std::vector<std::vector<std::pair<Atom *, unsigned int>>>>
68getConnectorPermutations(const std::vector<std::unique_ptr<ROMol>> &molFrags,
69 const boost::dynamic_bitset<> &fragConns,
70 const boost::dynamic_bitset<> &reactionConns);
71
72// As above, but just returns the bitsets for the connector permutations,
73// not the molecules.
75std::vector<std::vector<boost::dynamic_bitset<>>> getConnectorPermutations(
76 const std::vector<boost::dynamic_bitset<>> &fragConnPatts,
77 const boost::dynamic_bitset<> &reactionConns);
78
79// If all bits in one of the bitsets is unset, it means that nothing matched
80// that synthon. If at least one of the bitsets has a set bit, all products
81// incorporating the synthon with no bits set must match the query so
82// should be used because the query matches products that don't incorporate
83// anything from 1 of the synthon lists. Therefore those bits will all be
84// set on exit. For example, if the synthons are
85// [1*]Nc1c([2*])cccc1 and [1*]=CC=C[2*] and the query is c1ccccc1.
87 std::vector<boost::dynamic_bitset<>> &bitSets);
88
90 const std::vector<boost::dynamic_bitset<>> &bitSets,
91 std::vector<std::vector<size_t>> &outVecs);
92
93// class to step through all combinations of lists of different sizes.
94// returns (0,0,0), (0,0,1), (0,1,0) etc.
96 explicit Stepper(const std::vector<size_t> &sizes) : d_sizes(sizes) {
97 d_currState = std::vector<size_t>(sizes.size(), 0);
98 }
99 void step() {
100 // Don't do anything if we're at the end, but expect an infinite
101 // loop if the user isn't wise to this.
102 if (d_currState[0] == d_sizes[0]) {
103 return;
104 }
105 std::int64_t i = static_cast<std::int64_t>(d_currState.size()) - 1;
106 while (i >= 0) {
107 ++d_currState[i];
108 if (d_currState[0] == d_sizes[0]) {
109 return;
110 }
111 if (d_currState[i] == d_sizes[i]) {
112 d_currState[i] = 0;
113 } else {
114 break;
115 }
116 --i;
117 }
118 }
119 std::vector<size_t> d_currState;
120 std::vector<size_t> d_sizes;
121};
122
123// Return a molecule containing the portions of the molecule starting at
124// each dummy atom and going out up to 3 bonds. There may be more than
125// 1 fragment if there are dummy atoms more than 3 bonds apart, and there
126// may be fragments with more than 1 dummy atom if their dummy atoms fall
127// within 3 bonds of each other. E.g. the molecule [1*]CN(C[2*])Cc1ccccc1
128// will give [1*]CN(C)C[1*]. The 2 dummy atoms are 4 bonds apart, but the
129// fragments overlap. All dummy atoms given isotope 1 whatever they had
130// before.
132 const ROMol &mol);
133
134// Take any query atoms out of the molecule, replacing them with the
135// nearest thing possible. Probably this will just be the atomic
136// number. It doesn't change dummy atoms that have an isotope as
137// these will be connectors, or anything with an AtomType query as
138// they are uncontroversial. Returns true if it did something,
139// false if the molecule was left unchanged.
141
142// Put together a product name in the Enamine style, which uses
143// a semicolon as a separator and has the reagents names followed
144// by the reaction name.
146 const std::string &reactionId, const std::vector<std::string> &fragIds);
149 const std::vector<size_t> &fragNums);
150// Zip the fragments together to make a molecule. Assumes the connection
151// points are marking by isotope numbers on dummy atoms.
153 const std::vector<const ROMol *> &synthons);
154
155// Make a map that has all the fragments with the same SMILES
156// in a vector keyed by that SMILES.
157RDKIT_SYNTHONSPACESEARCH_EXPORT std::map<std::string, std::vector<ROMol *>>
158mapFragsBySmiles(std::vector<std::vector<std::unique_ptr<ROMol>>> &fragSets,
159 bool &cancelled);
160
161} // namespace SynthonSpaceSearch::details
162} // namespace RDKit
163
164#endif // RDKIT_SYNTHONSPACESEARCHDETAILS_H
std::chrono::steady_clock Clock
std::chrono::time_point< Clock > TimePoint
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
#define RDKIT_SYNTHONSPACESEARCH_EXPORT
Definition export.h:545
RDKIT_SYNTHONSPACESEARCH_EXPORT boost::dynamic_bitset getConnectorPattern(const std::vector< std::unique_ptr< ROMol > > &fragSet)
RDKIT_SYNTHONSPACESEARCH_EXPORT std::vector< std::vector< std::vector< std::pair< Atom *, unsigned int > > > > getConnectorPermutations(const std::vector< std::unique_ptr< ROMol > > &molFrags, const boost::dynamic_bitset<> &fragConns, const boost::dynamic_bitset<> &reactionConns)
RDKIT_SYNTHONSPACESEARCH_EXPORT bool removeQueryAtoms(RWMol &mol)
RDKIT_SYNTHONSPACESEARCH_EXPORT void bitSetsToVectors(const std::vector< boost::dynamic_bitset<> > &bitSets, std::vector< std::vector< size_t > > &outVecs)
RDKIT_SYNTHONSPACESEARCH_EXPORT bool checkTimeOut(const TimePoint *endTime)
RDKIT_SYNTHONSPACESEARCH_EXPORT int countConnections(const ROMol &mol)
RDKIT_SYNTHONSPACESEARCH_EXPORT std::vector< std::vector< unsigned int > > permMFromN(unsigned int m, unsigned int n)
RDKIT_SYNTHONSPACESEARCH_EXPORT std::vector< boost::dynamic_bitset<> > getConnectorPatterns(const std::vector< std::unique_ptr< ROMol > > &fragSet)
RDKIT_SYNTHONSPACESEARCH_EXPORT std::unique_ptr< ROMol > buildConnRegion(const ROMol &mol)
RDKIT_SYNTHONSPACESEARCH_EXPORT std::vector< std::vector< unsigned int > > combMFromN(unsigned int m, unsigned int n)
RDKIT_SYNTHONSPACESEARCH_EXPORT std::vector< std::vector< std::unique_ptr< ROMol > > > splitMolecule(const ROMol &query, unsigned int maxNumFrags, const std::uint64_t maxNumFragSets, const TimePoint *endTime, const int numThreads, bool &timedOut)
RDKIT_SYNTHONSPACESEARCH_EXPORT std::map< std::string, std::vector< ROMol * > > mapFragsBySmiles(std::vector< std::vector< std::unique_ptr< ROMol > > > &fragSets, bool &cancelled)
RDKIT_SYNTHONSPACESEARCH_EXPORT std::unique_ptr< ROMol > buildProduct(const std::vector< const ROMol * > &synthons)
RDKIT_SYNTHONSPACESEARCH_EXPORT std::string buildProductName(const std::string &reactionId, const std::vector< std::string > &fragIds)
RDKIT_SYNTHONSPACESEARCH_EXPORT void expandBitSet(std::vector< boost::dynamic_bitset<> > &bitSets)
Std stuff.
bool rdvalue_is(const RDValue_cast_t)