RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
StructChecker.h
Go to the documentation of this file.
1//
2// Copyright (C) 2016 Novartis Institutes for BioMedical Research
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11/*! \file StructChecker.h
12
13\brief Contains the public API of the StructChecker
14
15\b Note that this should be considered beta and that the API may change in
16future
17releases.
18
19*/
20#include <RDGeneral/export.h>
21#pragma once
22#ifndef RD_STRUCTCHECKER_H_Oct2016
23#define RD_STRUCTCHECKER_H_Oct2016
24
25#include <string>
26#include <vector>
27#include "../RDKitBase.h"
28
29namespace RDKit {
30namespace StructureCheck {
31
32// Flags for the return values of the StructureChecker
33
34// TypeDefs for translating augmented atom pairs
35static const int ANY_CHARGE = 8;
43
44enum AABondType { // MDL CTFile bond types plus extensions
45 BT_NONE = 0, // means REMOVE Bond
46 SINGLE = 1,
47 DOUBLE = 2,
48 TRIPLE = 3,
54 ALL_BOND_TYPES = 0xF
55};
56
58 TP_NONE = 0, // Don't care
59 RING = 1, // Ring
60 CHAIN = 2 // Chain
61};
62
64 std::string AtomSymbol;
65 int Charge;
67 unsigned SubstitutionCount; // substitution count 0 = don't care
70 : Charge(ANY_CHARGE),
71 Radical(ANY_RADICAL),
72 SubstitutionCount(0),
73 BondType(ANY_BOND) {}
74};
75
77 std::string AtomSymbol;
78 std::string ShortName;
79 int Charge;
82 std::vector<Ligand> Ligands;
83
85 : Charge(ANY_CHARGE), Radical(ANY_RADICAL), Topology(TP_NONE) {}
86
87 AugmentedAtom(const std::string &symbol, const std::string &name, int charge,
88 RadicalType radical, AATopology topology)
89 : AtomSymbol(symbol),
90 ShortName(name),
91 Charge(charge),
92 Radical(radical),
93 Topology(topology) {}
94};
95
97 std::string AtomSymbol;
98 double LocalInc;
99 double AlphaInc;
100 double BetaInc;
101 double MultInc;
102
103 // Used for logging
108};
109
112 double Cond;
113 // Used for logging
115};
116//-------------
117
118//! Structure Check Options
119/// Holds all the user options for the StructureChecking.
120/// Can be initialized from factory functions, perhaps serialized
127 unsigned MaxMolSize;
135
136 // Internal data for struchk
137 std::vector<std::pair<AugmentedAtom, AugmentedAtom>> AugmentedAtomPairs;
138 std::vector<AugmentedAtom> AcidicAtoms;
139 std::vector<AugmentedAtom> GoodAtoms;
140 std::vector<ROMOL_SPTR> Patterns;
141 std::vector<ROMOL_SPTR> RotatePatterns;
142 std::vector<ROMOL_SPTR> StereoPatterns;
143 std::vector<ROMOL_SPTR> FromTautomer;
144 std::vector<ROMOL_SPTR> ToTautomer;
145
146 double Elneg0; // elneg_table[0].value;
147 std::map<unsigned, double> ElnegTable; // AtomicNumber -> eleng
148 std::vector<IncEntry> AtomAcidity; // atom_acidity_table[]
149 std::vector<IncEntry> ChargeIncTable;
150 // std::map AtomSymbol(or AtomicNumber) -> IncEntry
151 /* [ReadTransformation() ]
152 * The alpha, beta coefficients of the transfomation function used
153 * to stretch the preliminary pKa values to the actual predictions.
154 * The function is pKa = 7 + (pKa'-7)*beta + ((pKa'-7)*alpha)^3.
155 */
156
157 double Alpha, Beta;
158 std::vector<PathEntry> AlphaPathTable, BetaPathTable;
159
160 public:
162
163 void clear() { *this = StructCheckerOptions(); }
164
165 bool loadAugmentedAtomTranslations(const std::string &path);
167 const std::vector<std::pair<AugmentedAtom, AugmentedAtom>> &aaPairs);
168
169 bool loadAcidicAugmentedAtoms(const std::string &path);
170 void setAcidicAugmentedAtoms(const std::vector<AugmentedAtom> &acidicAtoms);
171
172 bool loadGoodAugmentedAtoms(const std::string &path);
173 void setGoodAugmentedAtoms(const std::vector<AugmentedAtom> &acidicAtoms);
174
175 bool loadPatterns(const std::string &path); // file with clean patterns
177 const std::vector<std::string> &smarts); // can throw RDKit exceptions
178 void setPatterns(const std::vector<ROMOL_SPTR> &p);
179
181 const std::string &path); // file with rotate patterns
183 const std::vector<std::string> &smarts); // can throw RDKit exceptions
184 void setRotatePatterns(const std::vector<ROMOL_SPTR> &p);
185
187 const std::string &path); // file with stereo patterns
189 const std::vector<std::string> &smarts); // can throw RDKit exceptions
190 void setStereoPatterns(const std::vector<ROMOL_SPTR> &p);
191
192 bool loadTautomerData(const std::string &path); // file path
193 void parseTautomerData(const std::vector<std::string> &smartsFrom,
194 const std::vector<std::string> &smartsTo);
195 void setTautomerData(const std::vector<ROMOL_SPTR> &from,
196 const std::vector<ROMOL_SPTR> &to);
197 bool loadChargeDataTables(const std::string &path); // file path
198};
199
202
205 const std::string &augmentedAtomTranslationsFile = "",
206 // ?? AcidicAtoms;
207 // ?? GoodAtoms;
208 const std::string &patternFile = "", // file with clean patterns
209 const std::string &rotatePatternFile = "", // file with rotate patterns
210 const std::string &stereoPatternFile = "", // file with stereo patterns
211 const std::string &tautomerFile = "");
212
213//! \brief Class for performing structure validation and cleanup
214/*! \b NOTE: This class should be considered beta. The API may change in future
215releases.
216
217Examples of Usage
218
219\code
220 StructChecker chk;
221 int flags = StructureCheck::checkMolStructure( mol ); // use defaults
222\endcode
223
224or
225
226\code
227 StructureCheck::StructCheckerOptions options; // use defaults
228 // To use external data
229 StructureCheck::loadOptionsFromFiles(options, file1, file2);
230 StructChecker chk(options);
231
232 for( mol in mols ) {
233 int flags = StructureCheck::checkMolStructure( mol, &options);
234 if (0!=(flags & StructureCheck::StructureFlags::BAD_SET)) {
235 // write to error file
236 } else if (0!=(flags & StructureCheck::StructureFlags::TRANSFORMED_SET))
237{
238 // input molecule was transformed
239 } else { // flag == NO_CHANGE
240 // no change
241 }
242 }
243\endcode
244*/
246 public:
247 typedef enum StructureFlags {
248 NO_CHANGE = 0,
249 BAD_MOLECULE = 0x0001,
250 ALIAS_CONVERSION_FAILED = 0x0002,
251 STEREO_ERROR = 0x0004,
252 STEREO_FORCED_BAD = 0x0008,
253 ATOM_CLASH = 0x0010,
254 ATOM_CHECK_FAILED = 0x0020,
255 SIZE_CHECK_FAILED = 0x0040,
256 // reserved error = 0x0080,
257 TRANSFORMED = 0x0100,
258 FRAGMENTS_FOUND = 0x0200,
259 EITHER_WARNING = 0x0400,
260 DUBIOUS_STEREO_REMOVED = 0x0800,
261 RECHARGED = 0x1000,
262 STEREO_TRANSFORMED = 0x2000,
263 TEMPLATE_TRANSFORMED = 0x4000,
264 TAUTOMER_TRANSFORMED = 0x8000,
265 // mask:
266 BAD_SET = (BAD_MOLECULE | ALIAS_CONVERSION_FAILED | STEREO_ERROR |
267 STEREO_FORCED_BAD | ATOM_CLASH | ATOM_CHECK_FAILED |
268 SIZE_CHECK_FAILED),
269
270 TRANSFORMED_SET = (TRANSFORMED | FRAGMENTS_FOUND | EITHER_WARNING |
271 DUBIOUS_STEREO_REMOVED | STEREO_TRANSFORMED |
272 TEMPLATE_TRANSFORMED | TAUTOMER_TRANSFORMED | RECHARGED),
273 } StructureFlags;
274 // attributes:
275 private:
276 StructCheckerOptions Options;
277
278 public:
279 inline StructChecker() {}
280 inline StructChecker(const StructCheckerOptions &options)
281 : Options(options) {}
282
283 const StructCheckerOptions &GetOptions() const { return Options; }
284 void SetOptions(const StructCheckerOptions &options) { Options = options; }
285
286 // Check and fix (if need) molecule structure and return a set of
287 // StructureFlags
288 // that describes what have been done
289 unsigned checkMolStructure(RWMol &mol) const;
290
291 // an instance independent helper methods:
292 // Converts structure property flags to a comma separated string
293 static std::string StructureFlagsToString(unsigned flags);
294 // Converts a comma separated string to a StructureFlag unsigned integer
295 static unsigned StringToStructureFlags(const std::string &str);
296 // internal implementation:
297 private:
298};
299} // namespace StructureCheck
300} // namespace RDKit
301#endif
pulls in the core RDKit functionality
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
Class for performing structure validation and cleanup.
StructChecker(const StructCheckerOptions &options)
static std::string StructureFlagsToString(unsigned flags)
static unsigned StringToStructureFlags(const std::string &str)
void SetOptions(const StructCheckerOptions &options)
const StructCheckerOptions & GetOptions() const
unsigned checkMolStructure(RWMol &mol) const
#define RDKIT_STRUCTCHECKER_EXPORT
Definition export.h:489
static const char * symbol[119]
Definition mf.h:257
RDKIT_STRUCTCHECKER_EXPORT bool loadOptionsFromFiles(StructCheckerOptions &op, const std::string &augmentedAtomTranslationsFile="", const std::string &patternFile="", const std::string &rotatePatternFile="", const std::string &stereoPatternFile="", const std::string &tautomerFile="")
RDKIT_STRUCTCHECKER_EXPORT bool parseOptionsJSON(const std::string &json, StructCheckerOptions &op)
static const int ANY_CHARGE
Std stuff.
bool rdvalue_is(const RDValue_cast_t)
AugmentedAtom(const std::string &symbol, const std::string &name, int charge, RadicalType radical, AATopology topology)
std::vector< AugmentedAtom > GoodAtoms
void setRotatePatterns(const std::vector< ROMOL_SPTR > &p)
bool loadAcidicAugmentedAtoms(const std::string &path)
bool loadRotatePatterns(const std::string &path)
bool loadChargeDataTables(const std::string &path)
bool loadGoodAugmentedAtoms(const std::string &path)
std::map< unsigned, double > ElnegTable
void setGoodAugmentedAtoms(const std::vector< AugmentedAtom > &acidicAtoms)
bool loadAugmentedAtomTranslations(const std::string &path)
void setAugmentedAtomTranslations(const std::vector< std::pair< AugmentedAtom, AugmentedAtom > > &aaPairs)
void parseStereoPatterns(const std::vector< std::string > &smarts)
std::vector< AugmentedAtom > AcidicAtoms
void parseTautomerData(const std::vector< std::string > &smartsFrom, const std::vector< std::string > &smartsTo)
void parsePatterns(const std::vector< std::string > &smarts)
bool loadStereoPatterns(const std::string &path)
void setPatterns(const std::vector< ROMOL_SPTR > &p)
void setStereoPatterns(const std::vector< ROMOL_SPTR > &p)
void setTautomerData(const std::vector< ROMOL_SPTR > &from, const std::vector< ROMOL_SPTR > &to)
void setAcidicAugmentedAtoms(const std::vector< AugmentedAtom > &acidicAtoms)
void parseRotatePatterns(const std::vector< std::string > &smarts)
std::vector< std::pair< AugmentedAtom, AugmentedAtom > > AugmentedAtomPairs
bool loadPatterns(const std::string &path)
bool loadTautomerData(const std::string &path)