RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SynthonSet.h
Go to the documentation of this file.
1//
2// Copyright (C) David Cosgrove 2024.
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#ifndef RDKIT_SYNTHONSET_H
12#define RDKIT_SYNTHONSET_H
13
14#include <iosfwd>
15#include <string>
16#include <vector>
17
18#include <boost/dynamic_bitset.hpp>
19
20#include <RDGeneral/export.h>
24
25namespace RDKit {
26class ROMol;
27
28namespace SynthonSpaceSearch {
29class Synthon;
30
31// This class holds all the synthons for a particular reaction.
33 public:
34 SynthonSet() = default;
35 explicit SynthonSet(const std::string &id) : d_id(id) {}
36 SynthonSet(const SynthonSet &rhs) = delete;
37 SynthonSet(SynthonSet &&rhs) = delete;
38
39 const std::string &getId() const { return d_id; }
40 const std::vector<std::vector<std::unique_ptr<Synthon>>> &getSynthons()
41 const {
42 return d_synthons;
43 }
44 const boost::dynamic_bitset<> &getConnectors() const { return d_connectors; }
45 const std::vector<boost::dynamic_bitset<>> &getSynthonConnectorPatterns()
46 const {
47 return d_synthConnPatts;
48 }
49 const std::vector<std::shared_ptr<ROMol>> &getConnectorRegions() const;
50
51 const std::unique_ptr<ExplicitBitVect> &getConnRegFP() const;
52 const std::vector<int> &getNumConnectors() const;
53 bool hasFingerprints() const;
54 const std::vector<std::vector<std::unique_ptr<ExplicitBitVect>>> &
56
57 // Writes to/reads from a binary stream.
58 void writeToDBStream(std::ostream &os) const;
59 void readFromDBStream(std::istream &is, std::uint32_t version);
60 // write the enumerated molecules to the stream in SMILES format.
61 void enumerateToStream(std::ostream &os) const;
62
63 // SynthonSet takes control of the newSynthon and manages it.
64 void addSynthon(int synthonSetNum, std::unique_ptr<Synthon> newSynthon);
65
66 // Sometimes the synthon sets are numbered from 1 in the text file,
67 // in which case there'll be an empty set 0.
69
70 // The bonds in the synthons may not be the same as in the products, and
71 // this is a problem for aromatic ring creation in particular. Such as:
72 // [1*]=CC=C[2*] and [1*]Nc1c([2*])cccc1 giving c1ccc2ncccc2c1. So
73 // transfer the types of bonds from the products to the synthons.
75
76 // Build the connector regions and their fingerprints. Only used when
77 // creating a SynthonSpace from a text file.
79
80 // Scan through the connectors ([1*], [2*] etc.) in the synthons
81 // and set bits in d_connectors accordingly. Also removes any empty
82 // reagent sets, which might be because the synthon numbers start from
83 // 1 rather than 0. Only used when creating a SynthonSpace from a text
84 // file.
86
89
90 // Return the molecules for synthons for which the bits are true.
91 // Obviously requires that reqSynths is the same dimensions as
92 // d_synthons.
93 std::vector<std::vector<ROMol *>> getSynthons(
94 const std::vector<boost::dynamic_bitset<>> &reqSynths) const;
95
96 std::string buildProductName(const std::vector<size_t> &synthNums) const;
97 std::unique_ptr<ROMol> buildProduct(
98 const std::vector<size_t> &synthNums) const;
99
100 private:
101 std::string d_id;
102 // The lists of synthons. A product of the reaction is created by
103 // combining 1 synthon from each of the outer vectors.
104 std::vector<std::vector<std::unique_ptr<Synthon>>> d_synthons;
105 // 4 bits showing which connectors are present in all the
106 // synthon sets.
107 boost::dynamic_bitset<> d_connectors;
108 // and the connector patterns for each synthon set.
109 std::vector<boost::dynamic_bitset<>> d_synthConnPatts;
110
111 // The connector regions of a molecule are the pieces of up to 3 bonds from
112 // a connector atom into the molecule. We keep a vector of all the ones
113 // present in the synthons in the set, plus a fingerprint of all their
114 // fingerprints folded into 1. If a query fragment doesn't have a
115 // connector region in common with any of the synthons it can be assumed that
116 // the fragment won't have a match in this SynthonSet.
117 std::vector<std::shared_ptr<ROMol>> d_connectorRegions;
118 // The fingerprint of the connector regions. Fingerprints for all
119 // connector regions are folded into the same fingerprint.
120 std::unique_ptr<ExplicitBitVect> d_connRegFP;
121 // The number of connectors in the synthons in each synthon set.
122 std::vector<int> d_numConnectors;
123
124 // The fingerprints for the synthons for use with a fingerprint similarity
125 // search. They are not properties of the Synthons because they are not
126 // generated directly from them, as explained in buildSynthonFingerprints.
127 std::vector<std::vector<std::unique_ptr<ExplicitBitVect>>> d_synthonFPs;
128
129 // Tag each atom and bond in each synthon with its index and the synthon
130 // set number it came from.
131 void tagSynthonAtomsAndBonds() const;
132};
133
134} // namespace SynthonSpaceSearch
135
136} // namespace RDKit
137
138#endif // RDKIT_SYNTHONSET_H
class that generates same fingerprint style for different output formats
const std::vector< std::vector< std::unique_ptr< Synthon > > > & getSynthons() const
Definition SynthonSet.h:40
SynthonSet(SynthonSet &&rhs)=delete
void enumerateToStream(std::ostream &os) const
const std::unique_ptr< ExplicitBitVect > & getConnRegFP() const
void readFromDBStream(std::istream &is, std::uint32_t version)
const std::vector< std::vector< std::unique_ptr< ExplicitBitVect > > > & getSynthonFPs() const
std::unique_ptr< ROMol > buildProduct(const std::vector< size_t > &synthNums) const
void writeToDBStream(std::ostream &os) const
void addSynthon(int synthonSetNum, std::unique_ptr< Synthon > newSynthon)
void buildSynthonFingerprints(const FingerprintGenerator< std::uint64_t > &fpGen)
const std::vector< std::shared_ptr< ROMol > > & getConnectorRegions() const
SynthonSet(const std::string &id)
Definition SynthonSet.h:35
std::string buildProductName(const std::vector< size_t > &synthNums) const
const std::vector< int > & getNumConnectors() const
std::vector< std::vector< ROMol * > > getSynthons(const std::vector< boost::dynamic_bitset<> > &reqSynths) const
const std::vector< boost::dynamic_bitset<> > & getSynthonConnectorPatterns() const
Definition SynthonSet.h:45
const boost::dynamic_bitset & getConnectors() const
Definition SynthonSet.h:44
SynthonSet(const SynthonSet &rhs)=delete
const std::string & getId() const
Definition SynthonSet.h:39
#define RDKIT_SYNTHONSPACESEARCH_EXPORT
Definition export.h:545
Std stuff.