RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SynthonSet.h
Go to the documentation of this file.
1//
2// Copyright (C) David Cosgrove 2024.
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#ifndef RDKIT_SYNTHONSET_H
12#define RDKIT_SYNTHONSET_H
13
14#include <iosfwd>
15#include <string>
16#include <vector>
17
18#include <boost/dynamic_bitset.hpp>
19
20#include <RDGeneral/export.h>
24
25namespace RDKit {
26class ROMol;
27
28namespace SynthonSpaceSearch {
29class Synthon;
30class SynthonSpace;
31struct SynthonSpaceSearchParams;
32
33// This class holds pointers to all the synthons for a particular
34// reaction. The synthons themselves are in a pool in the
35// SynthonSpace.
37 public:
38 SynthonSet() = default;
39 explicit SynthonSet(const std::string &id) : d_id(id) {}
40 SynthonSet(const SynthonSet &rhs) = delete;
41 SynthonSet(SynthonSet &&rhs) = delete;
42
43 const std::string &getId() const { return d_id; }
44 const std::vector<std::vector<std::pair<std::string, Synthon *>>> &
45 getSynthons() const {
46 return d_synthons;
47 }
48 const boost::dynamic_bitset<> &getConnectors() const { return d_connectors; }
49 const std::vector<boost::dynamic_bitset<>> &getSynthonConnectorPatterns()
50 const {
51 return d_synthConnPatts;
52 }
53 const std::vector<std::shared_ptr<ROMol>> &getConnectorRegions() const;
54 const std::vector<std::string> &getConnectorRegionSmiles() const;
55 const std::vector<std::unique_ptr<ExplicitBitVect>> &getConnRegFPs() const;
56 const std::unique_ptr<ExplicitBitVect> &getAddFP() const;
57 const std::unique_ptr<ExplicitBitVect> &getSubtractFP() const;
58 const std::vector<int> &getNumConnectors() const;
59 std::uint64_t getNumProducts() const;
60 bool hasFingerprints() const;
62
63 // Writes to/reads from a binary stream.
64 void writeToDBStream(std::ostream &os) const;
65 void readFromDBStream(std::istream &is, const SynthonSpace &space,
66 std::uint32_t version);
67 // write the enumerated molecules to the stream in SMILES format.
68 void enumerateToStream(std::ostream &os) const;
69
70 // This stores the pointer to the Synthon, but doesn't manage
71 // it and should never delete it.
72 void addSynthon(int synthonSetNum, Synthon *newSynthon,
73 const std::string &synthonId);
74
75 // Sometimes the synthon sets are numbered from 1 in the text file,
76 // in which case there'll be an empty set 0.
78
79 // The bonds in the synthons may not be the same as in the products, and
80 // this is a problem for aromatic ring creation in particular. Such as:
81 // [1*]=CC=C[2*] and [1*]Nc1c([2*])cccc1 giving c1ccc2ncccc2c1. So
82 // make versions of the synthons that reflect this, storead as searchMol
83 // in each synthon.
85
86 // Build the connector regions and their fingerprints. Only used when
87 // creating a SynthonSpace from a text file.
89
90 // Scan through the connectors ([1*], [2*] etc.) in the synthons
91 // and set bits in d_connectors accordingly. Also removes any empty
92 // reagent sets, which might be because the synthon numbers start from
93 // 1 rather than 0. Only used when creating a SynthonSpace from a text
94 // file.
96
100 unsigned int numBits);
101
102 // Return the molecules for synthons for which the bits are true.
103 // Obviously requires that reqSynths is the same dimensions as
104 // d_synthons.
105 std::vector<std::vector<ROMol *>> getSynthons(
106 const std::vector<boost::dynamic_bitset<>> &reqSynths) const;
107
108 std::string buildProductName(const std::vector<size_t> &synthNums) const;
109 std::unique_ptr<ROMol> buildProduct(
110 const std::vector<size_t> &synthNums) const;
111
112 private:
113 std::string d_id;
114 // The lists of synthons. A product of the reaction is created by
115 // combining 1 synthon from each of the outer vectors. The actual
116 // Synthon objects are held in the SynthonSpace which manages all
117 // the memory. In different reactions/SynthonSets the same Synthon
118 // can have different IDs, so we need to keep the ID here rather
119 // than in the Synthon, whose primary key is its SMILES string.
120 std::vector<std::vector<std::pair<std::string, Synthon *>>> d_synthons;
121 // MAX_CONNECTOR_NUM+1 bits showing which connectors are present in all the
122 // synthon sets.
123 boost::dynamic_bitset<> d_connectors;
124 // And the connector patterns for each synthon set. If synthon set 0
125 // has connectors 1 and 3, then d_synthConnPatts[0] will have bits
126 // 1 and 3 set.
127 std::vector<boost::dynamic_bitset<>> d_synthConnPatts;
128
129 // The connector regions of a molecule are the pieces of up to 3 bonds from
130 // a connector atom into the molecule. We keep a vector of all the ones
131 // present in the synthons in the set, plus a fingerprint for each.
132 // If a query fragment doesn't have a connector region in common with
133 // any of the synthons it can be assumed that the fragment won't have
134 // a match in this SynthonSet.
135 std::vector<std::shared_ptr<ROMol>> d_connectorRegions;
136 std::vector<std::string> d_connRegSmis;
137 // The fingerprints of the connector regions.
138 std::vector<std::unique_ptr<ExplicitBitVect>> d_connRegFPs;
139
140 // When doing an approximate FP similarity by ORing together
141 // the synthonFPs, adding d_addFP and subtracting d_subtractFP
142 // accounts (a bit) for the joins and the dummy atoms
143 // respectively.
144 std::unique_ptr<ExplicitBitVect> d_addFP;
145 std::unique_ptr<ExplicitBitVect> d_subtractFP;
146
147 // The number of connectors in the synthons in each synthon set.
148 std::vector<int> d_numConnectors;
149};
150
151} // namespace SynthonSpaceSearch
152
153} // namespace RDKit
154
155#endif // RDKIT_SYNTHONSET_H
class that generates same fingerprint style for different output formats
const std::unique_ptr< ExplicitBitVect > & getSubtractFP() const
const std::vector< std::vector< std::pair< std::string, Synthon * > > > & getSynthons() const
Definition SynthonSet.h:45
const std::unique_ptr< ExplicitBitVect > & getAddFP() const
const std::vector< std::unique_ptr< ExplicitBitVect > > & getConnRegFPs() const
SynthonSet(SynthonSet &&rhs)=delete
void enumerateToStream(std::ostream &os) const
void addSynthon(int synthonSetNum, Synthon *newSynthon, const std::string &synthonId)
std::unique_ptr< ROMol > buildProduct(const std::vector< size_t > &synthNums) const
void writeToDBStream(std::ostream &os) const
void buildAddAndSubtractFPs(const FingerprintGenerator< std::uint64_t > &fpGen, unsigned int numBits)
void readFromDBStream(std::istream &is, const SynthonSpace &space, std::uint32_t version)
void buildSynthonFingerprints(const FingerprintGenerator< std::uint64_t > &fpGen)
const std::vector< std::shared_ptr< ROMol > > & getConnectorRegions() const
SynthonSet(const std::string &id)
Definition SynthonSet.h:39
std::string buildProductName(const std::vector< size_t > &synthNums) const
const std::vector< int > & getNumConnectors() const
std::uint64_t getNumProducts() const
std::vector< std::vector< ROMol * > > getSynthons(const std::vector< boost::dynamic_bitset<> > &reqSynths) const
const std::vector< boost::dynamic_bitset<> > & getSynthonConnectorPatterns() const
Definition SynthonSet.h:49
const std::vector< std::string > & getConnectorRegionSmiles() const
const boost::dynamic_bitset & getConnectors() const
Definition SynthonSet.h:48
SynthonSet(const SynthonSet &rhs)=delete
const std::string & getId() const
Definition SynthonSet.h:43
#define RDKIT_SYNTHONSPACESEARCH_EXPORT
Definition export.h:545
Std stuff.