RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
ScaffoldNetwork.h
Go to the documentation of this file.
1//
2// Copyright (C) 2019 Greg Landrum and T5 Informatics GmbH
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SCAFFOLDNETWORK_H
12#define RD_SCAFFOLDNETWORK_H
13
14#include <vector>
15#include <map>
16#include <string>
17#include <sstream>
18#include <memory>
19#include <iostream>
20
21#ifdef RDK_USE_BOOST_SERIALIZATION
22#include <RDGeneral/Invariant.h>
24#include <boost/archive/text_oarchive.hpp>
25#include <boost/archive/text_iarchive.hpp>
26#include <boost/serialization/vector.hpp>
27#include <boost/serialization/shared_ptr.hpp>
28#include <boost/serialization/version.hpp>
30#endif
31
32namespace RDKit {
33class ROMol;
34class ChemicalReaction;
35
36namespace ScaffoldNetwork {
37
39 bool includeGenericScaffolds =
40 true; ///< include scaffolds with all atoms replaced by dummies
41 bool includeGenericBondScaffolds =
42 false; ///< include scaffolds with all bonds replaced by single bonds
43 bool includeScaffoldsWithoutAttachments =
44 true; ///< remove attachment points from scaffolds and include the result
45 bool includeScaffoldsWithAttachments =
46 true; ///< Include the version of the scaffold with attachment points
47 bool includeNames =
48 false; ///< Include molecules names of the input molecules
49 bool keepOnlyFirstFragment =
50 true; ///< keep only the first fragment from the bond breaking rule
51 bool pruneBeforeFragmenting =
52 true; ///< Do a pruning/flattening step before starting fragmenting
53 bool flattenIsotopes = true; ///< remove isotopes when flattening
54 bool flattenChirality =
55 true; ///< remove chirality and bond stereo when flattening
56 bool flattenKeepLargest =
57 true; ///< keep only the largest fragment when doing flattening
58 bool collectMolCounts = true; ///< keep track of the number of molecules each
59 ///< scaffold was reached from
60
61 std::vector<std::shared_ptr<ChemicalReaction>>
62 bondBreakersRxns; ///< the reaction(s) used to fragment. Should expect a
63 ///< single reactant and produce two products
65 : ScaffoldNetworkParams{{"[!#0;R:1]-!@[!#0:2]>>[*:1]-[#0].[#0]-[*:2]"}} {}
66 ScaffoldNetworkParams(const std::vector<std::string> &bondBreakersSmarts);
67};
68
69enum class EdgeType {
70 Fragment = 1, ///< molecule -> fragment
71 Generic = 2, ///< molecule -> generic molecule (all atoms are dummies)
72 GenericBond = 3, ///< molecule -> generic bond molecule (all bonds single)
73 RemoveAttachment = 4, ///< molecule -> molecule with no attachment points
74 Initialize = 5 ///< molecule -> flattened molecule
75};
76
78 size_t beginIdx;
79 size_t endIdx;
81 NetworkEdge() : beginIdx(0), endIdx(0), type(EdgeType::Initialize) {}
82 NetworkEdge(size_t bi, size_t ei, EdgeType typ)
83 : beginIdx(bi), endIdx(ei), type(typ) {}
85 return (beginIdx == o.beginIdx) && (endIdx == o.endIdx) && (type == o.type);
86 }
88 return (beginIdx != o.beginIdx) || (endIdx != o.endIdx) || (type != o.type);
89 }
90#ifdef RDK_USE_BOOST_SERIALIZATION
91 private:
92 friend class boost::serialization::access;
93 template <class Archive>
94 void serialize(Archive &ar, const unsigned int version) {
95 RDUNUSED_PARAM(version);
96 ar & beginIdx;
97 ar & endIdx;
98 ar & type;
99 }
100#endif
101};
102
104 std::vector<std::string> nodes; ///< SMILES for the scaffolds
105 std::vector<unsigned>
106 counts; ///< number of times each scaffold was encountered
107 std::vector<unsigned>
108 molCounts; ///< number of molecules each scaffold was found in
109 std::vector<NetworkEdge> edges; ///< edges in the network
111#ifdef RDK_USE_BOOST_SERIALIZATION
112 ScaffoldNetwork(const std::string &pkl) {
113 std::stringstream iss(pkl);
114 boost::archive::text_iarchive ia(iss);
115 ia >> *this;
116 }
117
118 private:
119 friend class boost::serialization::access;
120 template <class Archive>
121 void serialize(Archive &ar, const unsigned int version) {
122 RDUNUSED_PARAM(version);
123 ar & nodes;
124 ar & counts;
125 if (version > 0) {
126 ar & molCounts;
127 }
128 ar & edges;
129 }
130#endif
131};
132
133//! update an existing ScaffoldNetwork using a set of molecules
134template <typename T>
136 const ScaffoldNetworkParams &params);
137
138//! create a new ScaffoldNetwork for a set of molecules
139template <typename T>
141 const ScaffoldNetworkParams &params) {
143 updateScaffoldNetwork(mols, res, params);
144 return res;
145}
146//! allows nodes to output nicely as strings
147inline std::ostream &operator<<(std::ostream &ostr,
149 switch (e) {
151 ostr << "Fragment";
152 break;
154 ostr << "Generic";
155 break;
157 ostr << "GenericBond";
158 break;
160 ostr << "RemoveAttachment";
161 break;
163 ostr << "Initialize";
164 break;
165 default:
166 ostr << "UNKNOWN";
167 break;
168 }
169 return ostr;
170}
171//! allows edges to output nicely as strings
172inline std::ostream &operator<<(std::ostream &ostr,
174 ostr << "NetworkEdge( " << e.beginIdx << "->" << e.endIdx
175 << ", type:" << e.type << " )";
176 return ostr;
177}
178
179//! returns parameters for constructing scaffold networks using BRICS
180//! fragmentation
182
183} // namespace ScaffoldNetwork
184} // namespace RDKit
185
186#ifdef RDK_USE_BOOST_SERIALIZATION
187namespace boost {
188namespace serialization {
189template <>
190struct version<RDKit::ScaffoldNetwork::ScaffoldNetwork> {
191 BOOST_STATIC_CONSTANT(int, value = 1);
192};
193} // namespace serialization
194} // namespace boost
195#endif
196
197#endif
RDKIT_GRAPHMOL_EXPORT std::ostream & operator<<(std::ostream &target, const RDKit::Atom &at)
allows Atom objects to be dumped to streams
#define RDUNUSED_PARAM(x)
Definition Invariant.h:196
#define RDKIT_SCAFFOLDNETWORK_EXPORT
Definition export.h:481
ScaffoldNetwork createScaffoldNetwork(const T &mols, const ScaffoldNetworkParams &params)
create a new ScaffoldNetwork for a set of molecules
void updateScaffoldNetwork(const T &mols, ScaffoldNetwork &network, const ScaffoldNetworkParams &params)
update an existing ScaffoldNetwork using a set of molecules
@ Initialize
molecule -> flattened molecule
@ Fragment
molecule -> fragment
@ Generic
molecule -> generic molecule (all atoms are dummies)
@ RemoveAttachment
molecule -> molecule with no attachment points
@ GenericBond
molecule -> generic bond molecule (all bonds single)
RDKIT_SCAFFOLDNETWORK_EXPORT ScaffoldNetworkParams getBRICSNetworkParams()
Std stuff.
bool rdvalue_is(const RDValue_cast_t)
Definition RDLog.h:25
bool operator==(const RDKit::ScaffoldNetwork::NetworkEdge &o) const
NetworkEdge(size_t bi, size_t ei, EdgeType typ)
bool operator!=(const RDKit::ScaffoldNetwork::NetworkEdge &o) const
std::vector< std::shared_ptr< ChemicalReaction > > bondBreakersRxns
ScaffoldNetworkParams(const std::vector< std::string > &bondBreakersSmarts)
std::vector< NetworkEdge > edges
edges in the network
std::vector< unsigned > molCounts
number of molecules each scaffold was found in
std::vector< std::string > nodes
SMILES for the scaffolds.
std::vector< unsigned > counts
number of times each scaffold was encountered