RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
Pipeline.h
Go to the documentation of this file.
1//
2// Copyright (C) 2023 Novartis Biomedical Research
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#ifndef RD_MOLSTANDARDIZE_PIPELINE_H
11#define RD_MOLSTANDARDIZE_PIPELINE_H
12#include <RDGeneral/export.h>
13#include <GraphMol/RWMol.h>
14#include <memory>
15#include <string>
16#include <utility>
17#include <vector>
18
19namespace RDKit {
20
21namespace MolStandardize {
22
24 // parsing
25 bool strictParsing{false};
26
27 // validation
28 bool reportAllFailures{true};
29 bool allowEmptyMolecules{false};
30 bool allowEnhancedStereo{false};
31 bool allowAromaticBondType{false};
32 bool allowDativeBondType{false};
33 double is2DZeroThreshold{1e-3};
34 double atomClashLimit{0.03};
35 double minMedianBondLength{1e-3};
36 double bondLengthLimit{100.};
37 bool allowLongBondsInRings{true};
38 bool allowAtomBondClashExemption{true};
39
40 // cleanup/standardization
41 // metal disconnector options
42 std::string metalNof{"[Li,Na,K,Rb,Cs,Fr]~[#7,#8,F]"};
43 std::string metalNon{};
44 // normalizer options
45 std::string normalizerData{
46 "// Name\tSMIRKS\n"
47 "Nitro to N+(O-)=O\t[N,P,As,Sb;X3:1](=[O,S,Se,Te:2])=[O,S,Se,Te:3]>>[*+1:1]([*-1:2])=[*:3]\n"
48 "Sulfone to S(=O)(=O)\t[S+2:1]([O-:2])([O-:3])>>[S+0:1](=[O-0:2])(=[O-0:3])\n"
49 "Pyridine oxide to n+O-\t[nH0+0:1]=[OH0+0:2]>>[n+:1][O-:2]\n"
50 "Azide to N=N+=N-\t[*:1][N:2]=[N:3]#[N:4]>>[*:1][N:2]=[N+:3]=[N-:4]\n"
51 "Diazo/azo to =N+=N-\t[*:1]=[N:2]#[N:3]>>[*:1]=[N+:2]=[N-:3]\n"
52 // Note: the sulfoxide transformation by default included in the
53 // Normalizer configuration was removed Note: the transformation below was
54 // ported from STRUCHK and it's not part of the default Normalizer
55 // configuration
56 "[SH](=O)(=O) to S(=O)O\t[c,C,N,O,F,Cl,Br,I:1][SH+0:2](=[O:3])=[O:4]>>[*:1][*:2]([*:3])=[*:4]\n"
57 // Note: the two transformations below replace the default Phosphate
58 // normalization in order to ensure that, if an O is available, the double
59 // bond is placed between P and O
60 "Phosphate to P(O-)=O\t[O-:1][P+;D4:2][O,S,Se,Te;-1:3]>>[O+0:1]=[P+0;D5:2][*-1:3]\n"
61 "Generalized phosphate to P(X-)=Y\t[S,Se,Te;-1:1][P+;D4:2][S,Se,Te;-1:3]>>[*+0:1]=[P+0;D5:2][*-1:3]\n"
62 "C/S+N to C/S=N+\t[C,S&!$([S+]-[O-]);X3+1:1]([NX3:2])[NX3!H0:3]>>[*+0:1]([N:2])=[N+:3]\n"
63 "P+N to P=N+\t[P;X4+1:1]([NX3:2])[NX3!H0:3]>>[*+0:1]([N:2])=[N+:3]\n"
64 "Recombine 1,3-separated charges\t[N,P,As,Sb,O,S,Se,Te;-1:1]-[A+0:2]=[N,P,As,Sb,O,S,Se,Te;+1:3]>>[*-0:1]=[*:2]-[*+0:3]\n"
65 "Recombine 1,3-separated charges\t[n,o,p,s;-1:1]:[a:2]=[N,O,P,S;+1:3]>>[*-0:1]:[*:2]-[*+0:3]\n"
66 "Recombine 1,3-separated charges\t[N,O,P,S;-1:1]-[a+0:2]:[n,o,p,s;+1:3]>>[*-0:1]=[*:2]:[*+0:3]\n"
67 "Recombine 1,5-separated charges\t[N,P,As,Sb,O,S,Se,Te;-1:1]-[A+0:2]=[A:3]-[A:4]=[N,P,As,Sb,O,S,Se,Te;+1:5]>>[*-0:1]=[*:2]-[*:3]=[*:4]-[*+0:5]\n"
68 "Recombine 1,5-separated charges\t[n,o,p,s;-1:1]:[a:2]:[a:3]:[c:4]=[N,O,P,S;+1:5]>>[*-0:1]:[*:2]:[*:3]:[c:4]-[*+0:5]\n"
69 "Recombine 1,5-separated charges\t[N,O,P,S;-1:1]-[c:2]:[a:3]:[a:4]:[n,o,p,s;+1:5]>>[*-0:1]=[c:2]:[*:3]:[*:4]:[*+0:5]\n"
70 // Note: four transformations were added to the normalization of aliphatic
71 // conjug cations in order to favor the positioning of new double bonds
72 // within rings
73 "Normalize 1,3 conjugated cation\t[N;+0!H0:1]@-[A:2]=[N!$(*~[N,O,P,S;-1]),O;+1H0:3]>>[*+1:1]=[*:2]-[*+0:3]\n"
74 "Normalize 1,5 conjugated cation\t[N;+0!H0:1]@-[A:2]=[A:3]@-[A:4]=[N!$(*~[N,O,P,S;-1]),O;+1H0:5]>>[*+1:1]=[*:2]-[*:3]=[*:4]-[*+0:5]\n"
75 "Normalize 1,3 conjugated cation\t[N,O!$(*N);+0!H0:1]-[A:2]=[N!$(*~[N,O,P,S;-1]),O;+1H0:3]>>[*+1:1]=[*:2]-[*+0:3]\n"
76 "Normalize 1,3 conjugated cation\t[n;+0!H0:1]:[c:2]=[N!$(*~[N,O,P,S;-1]),O;+1H0:3]>>[*+1:1]:[*:2]-[*+0:3]\n"
77 "Normalize 1,5 conjugated cation\t[N;+0!H0:1]@-[A:2]=[A:3]-[A:4]=[N!$(*~[N,O,P,S;-1]),O;+1H0:5]>>[*+1:1]=[*:2]-[*:3]=[*:4]-[*+0:5]\n"
78 "Normalize 1,5 conjugated cation\t[N,O!$(*N);+0!H0:1]-[A:2]=[A:3]@-[A:4]=[N!$(*~[N,O,P,S;-1]),O;+1H0:5]>>[*+1:1]=[*:2]-[*:3]=[*:4]-[*+0:5]\n"
79 "Normalize 1,5 conjugated cation\t[N,O!$(*N);+0!H0:1]-[A:2]=[A:3]-[A:4]=[N!$(*~[N,O,P,S;-1]),O;+1H0:5]>>[*+1:1]=[*:2]-[*:3]=[*:4]-[*+0:5]\n"
80 "Normalize 1,5 conjugated cation\t[n;+0!H0:1]:[a:2]:[a:3]:[c:4]=[N!$(*~[N,O,P,S;-1]),O;+1H0:5]>>[n+1:1]:[*:2]:[*:3]:[*:4]-[*+0:5]\n"
81 "Charge normalization\t[F,Cl,Br,I,At;-1:1]=[O:2]>>[*-0:1][O-:2]\n"
82 "Charge recombination\t[N,P,As,Sb;-1:1]=[C+;v3:2]>>[*+0:1]#[C+0:2]\n"};
83 unsigned int normalizerMaxRestarts{200};
84 double scaledMedianBondLength{1.};
85
86 // serialization
87 bool outputV2000{false};
88};
89
92 INPUT_ERROR = (1 << 0),
110 OUTPUT_ERROR = (1 << 12),
116 FRAGMENTS_REMOVED = (1 << 25),
120};
121
122enum class RDKIT_MOLSTANDARDIZE_EXPORT PipelineStage : std::uint32_t {
123 NOT_STARTED = 0,
134};
135
137 PipelineStatus status;
138 std::string detail;
139};
140
141using PipelineLog = std::vector<PipelineLogEntry>;
142
144 PipelineStatus status;
145 std::uint32_t stage;
147 std::string inputMolData;
148 std::string outputMolData;
149 std::string parentMolData;
150
151 void append(PipelineStatus newStatus, const std::string &info);
152};
153
154using RWMOL_SPTR_PAIR = std::pair<RWMOL_SPTR, RWMOL_SPTR>;
155
156namespace Operations {
158 RWMOL_SPTR mol, PipelineResult &result, const PipelineOptions &options);
161 const PipelineOptions &options);
163 RWMOL_SPTR mol, PipelineResult &result, const PipelineOptions &options);
165 RWMOL_SPTR mol, PipelineResult &result, const PipelineOptions &options);
167 RWMOL_SPTR mol, PipelineResult &result, const PipelineOptions &options);
169 RWMOL_SPTR mol, PipelineResult &result, const PipelineOptions &options);
171 RWMOL_SPTR mol, PipelineResult &result, const PipelineOptions &options);
172
175 const PipelineOptions &options);
178 const PipelineOptions &options);
179
180using ParseOperation = decltype(&parse);
181using SerializeOperation = decltype(&serialize);
183using ParentOperation = decltype(&makeParent);
184using PipelineVector = std::vector<std::pair<std::uint32_t, Operation>>;
185
187 // input sanitization and cleanup
188 {static_cast<uint32_t>(PipelineStage::PREPARE_FOR_VALIDATION),
190 // validate the structure
191 {static_cast<uint32_t>(PipelineStage::VALIDATION), &validate}};
192
194 {static_cast<uint32_t>(PipelineStage::PREPARE_FOR_STANDARDIZATION),
196 {static_cast<uint32_t>(PipelineStage::STANDARDIZATION), &standardize},
197 {static_cast<uint32_t>(PipelineStage::REAPPLY_WEDGING), &reapplyWedging},
198 {static_cast<uint32_t>(PipelineStage::CLEANUP_2D), &cleanup2D}};
199} // namespace Operations
200
202 private:
203 PipelineOptions options;
204 Operations::ParseOperation parse = Operations::parse;
205 Operations::SerializeOperation serialize = Operations::serialize;
206 Operations::PipelineVector validationSteps = Operations::validationSteps;
207 Operations::PipelineVector standardizationSteps =
208 Operations::standardizationSteps;
209 Operations::ParentOperation makeParent = Operations::makeParent;
210
211 public:
212 Pipeline() = default;
213 explicit Pipeline(const PipelineOptions &o) : options(o) {};
214 ~Pipeline() = default;
215
216 PipelineResult run(const std::string &molblock) const;
217
219 validationSteps = steps;
220 }
222 standardizationSteps = steps;
223 }
224 void setMakeParent(Operations::ParentOperation op) { makeParent = op; }
225 void setParse(Operations::ParseOperation op) { parse = op; }
226 void setSerialize(Operations::SerializeOperation op) { serialize = op; }
227
228 private:
229};
230
231} // namespace MolStandardize
232} // namespace RDKit
233
234#endif
PREPARE_FOR_STANDARDIZATION_ERROR
Definition Pipeline.h:102
FRAGMENTS_REMOVED
Definition Pipeline.h:116
FRAGMENT_STANDARDIZATION_ERROR
Definition Pipeline.h:105
NORMALIZER_STANDARDIZATION_ERROR
Definition Pipeline.h:104
FEATURES_VALIDATION_ERROR
Definition Pipeline.h:94
LAYOUT2D_VALIDATION_ERROR
Definition Pipeline.h:97
PREPARE_FOR_VALIDATION_ERROR
Definition Pipeline.h:93
NORMALIZATION_APPLIED
Definition Pipeline.h:115
METALS_DISCONNECTED
Definition Pipeline.h:114
STANDARDIZATION_ERROR
Definition Pipeline.h:107
STEREO_VALIDATION_ERROR
Definition Pipeline.h:98
OUTPUT_ERROR
Definition Pipeline.h:110
INPUT_ERROR
Definition Pipeline.h:92
VALIDATION_ERROR
Definition Pipeline.h:99
CHARGE_STANDARDIZATION_ERROR
Definition Pipeline.h:106
PROTONATION_CHANGED
Definition Pipeline.h:117
IS2D_VALIDATION_ERROR
Definition Pipeline.h:96
BASIC_VALIDATION_ERROR
Definition Pipeline.h:95
PIPELINE_ERROR
Definition Pipeline.h:111
METAL_STANDARDIZATION_ERROR
Definition Pipeline.h:103
NO_EVENT
Definition Pipeline.h:91
Defines the editable molecule class RWMol.
void setValidationSteps(const Operations::PipelineVector &steps)
Definition Pipeline.h:218
void setSerialize(Operations::SerializeOperation op)
Definition Pipeline.h:226
PipelineResult run(const std::string &molblock) const
void setStandardizationSteps(const Operations::PipelineVector &steps)
Definition Pipeline.h:221
void setParse(Operations::ParseOperation op)
Definition Pipeline.h:225
void setMakeParent(Operations::ParentOperation op)
Definition Pipeline.h:224
Pipeline(const PipelineOptions &o)
Definition Pipeline.h:213
#define RDKIT_MOLSTANDARDIZE_EXPORT
Definition export.h:345
RDKIT_MOLSTANDARDIZE_EXPORT RWMOL_SPTR reapplyWedging(RWMOL_SPTR mol, PipelineResult &result, const PipelineOptions &options)
RDKIT_MOLSTANDARDIZE_EXPORT RWMOL_SPTR validate(RWMOL_SPTR mol, PipelineResult &result, const PipelineOptions &options)
RDKIT_MOLSTANDARDIZE_EXPORT RWMOL_SPTR cleanup2D(RWMOL_SPTR mol, PipelineResult &result, const PipelineOptions &options)
decltype(&serialize) SerializeOperation
Definition Pipeline.h:181
const PipelineVector standardizationSteps
Definition Pipeline.h:193
RDKIT_MOLSTANDARDIZE_EXPORT RWMOL_SPTR standardize(RWMOL_SPTR mol, PipelineResult &result, const PipelineOptions &options)
RDKIT_MOLSTANDARDIZE_EXPORT RWMOL_SPTR parse(const std::string &molblock, PipelineResult &result, const PipelineOptions &options)
RDKIT_MOLSTANDARDIZE_EXPORT RWMOL_SPTR prepareForValidation(RWMOL_SPTR mol, PipelineResult &result, const PipelineOptions &options)
const PipelineVector validationSteps
Definition Pipeline.h:186
RDKIT_MOLSTANDARDIZE_EXPORT RWMOL_SPTR prepareForStandardization(RWMOL_SPTR mol, PipelineResult &result, const PipelineOptions &options)
RDKIT_MOLSTANDARDIZE_EXPORT void serialize(RWMOL_SPTR_PAIR output, PipelineResult &result, const PipelineOptions &options)
decltype(&makeParent) ParentOperation
Definition Pipeline.h:183
RDKIT_MOLSTANDARDIZE_EXPORT RWMOL_SPTR_PAIR makeParent(RWMOL_SPTR mol, PipelineResult &result, const PipelineOptions &options)
std::vector< std::pair< std::uint32_t, Operation > > PipelineVector
Definition Pipeline.h:184
decltype(&prepareForValidation) Operation
Definition Pipeline.h:182
decltype(&parse) ParseOperation
Definition Pipeline.h:180
enum RDKIT_MOLSTANDARDIZE_EXPORT COMPLETED
Definition Pipeline.h:134
enum RDKIT_MOLSTANDARDIZE_EXPORT PipelineStage
Definition Pipeline.h:122
enum RDKIT_MOLSTANDARDIZE_EXPORT PREPARE_FOR_STANDARDIZATION
Definition Pipeline.h:127
enum RDKIT_MOLSTANDARDIZE_EXPORT CLEANUP_2D
Definition Pipeline.h:130
enum RDKIT_MOLSTANDARDIZE_EXPORT MAKE_PARENT
Definition Pipeline.h:131
enum RDKIT_MOLSTANDARDIZE_EXPORT STANDARDIZATION
Definition Pipeline.h:128
enum RDKIT_MOLSTANDARDIZE_EXPORT PARSING_INPUT
Definition Pipeline.h:124
std::pair< RWMOL_SPTR, RWMOL_SPTR > RWMOL_SPTR_PAIR
Definition Pipeline.h:154
enum RDKIT_MOLSTANDARDIZE_EXPORT VALIDATION
Definition Pipeline.h:126
enum RDKIT_MOLSTANDARDIZE_EXPORT PREPARE_FOR_VALIDATION
Definition Pipeline.h:125
enum RDKIT_MOLSTANDARDIZE_EXPORT REAPPLY_WEDGING
Definition Pipeline.h:129
enum RDKIT_MOLSTANDARDIZE_EXPORT SERIALIZING_OUTPUT
Definition Pipeline.h:132
std::vector< PipelineLogEntry > PipelineLog
Definition Pipeline.h:141
Std stuff.
bool rdvalue_is(const RDValue_cast_t)
boost::shared_ptr< RWMol > RWMOL_SPTR
Definition RWMol.h:222
void append(PipelineStatus newStatus, const std::string &info)