RDKit
Open-source cheminformatics and machine learning.
StructChecker.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2016 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 /*! \file StructChecker.h
12 
13 \brief Contains the public API of the StructChecker
14 
15 \b Note that this should be considered beta and that the API may change in
16 future
17 releases.
18 
19 */
20 #pragma once
21 #ifndef RD_STRUCTCHECKER_H_Oct2016
22 #define RD_STRUCTCHECKER_H_Oct2016
23 
24 #include <string>
25 #include <vector>
26 #include "../RDKitBase.h"
27 
28 namespace RDKit {
29 namespace StructureCheck {
30 
31 // Flags for the return values of the StructureChecker
32 
33 // TypeDefs for translating augmented atom pairs
34 static const int ANY_CHARGE = 8;
36  RT_NONE = 0,
37  SINGLET = 1,
38  DOUBLET = 2,
39  TRIPLET = 3,
40  ANY_RADICAL = 0xFF
41 };
42 
43 enum AABondType { // MDL CTFile bond types plus extensions
44  BT_NONE = 0, // means REMOVE Bond
45  SINGLE = 1,
46  DOUBLE = 2,
47  TRIPLE = 3,
48  AROMATIC = 4,
52  ANY_BOND = 8,
54 };
55 
56 enum AATopology {
57  TP_NONE = 0, // Don't care
58  RING = 1, // Ring
59  CHAIN = 2 // Chain
60 };
61 
62 struct Ligand {
63  std::string AtomSymbol;
64  int Charge;
66  unsigned SubstitutionCount; // substitution count 0 = don't care
69  : Charge(ANY_CHARGE),
70  Radical(ANY_RADICAL),
71  SubstitutionCount(0),
72  BondType(ANY_BOND) {}
73 };
74 
75 struct AugmentedAtom {
76  std::string AtomSymbol;
77  std::string ShortName;
78  int Charge;
81  std::vector<Ligand> Ligands;
82 
84  : Charge(ANY_CHARGE), Radical(ANY_RADICAL), Topology(TP_NONE) {}
85 
86  AugmentedAtom(const std::string &symbol, const std::string &name, int charge,
87  RadicalType radical, AATopology topology)
88  : AtomSymbol(symbol),
89  ShortName(name),
90  Charge(charge),
91  Radical(radical),
92  Topology(topology) {}
93 };
94 
95 struct IncEntry {
96  std::string AtomSymbol;
97  double LocalInc;
98  double AlphaInc;
99  double BetaInc;
100  double MultInc;
101 
102  // Used for logging
107 };
108 
109 struct PathEntry {
111  double Cond;
112  // Used for logging
114 };
115 //-------------
116 
117 //! Structure Check Options
118 // Holds all the user options for the StructureChecking.
119 // Can be initialized from factory functions, perhaps serialized
121  double AcidityLimit;
126  unsigned MaxMolSize;
133  bool Verbose;
134 
135  // Internal data for struchk
136  std::vector<std::pair<AugmentedAtom, AugmentedAtom> > AugmentedAtomPairs;
137  std::vector<AugmentedAtom> AcidicAtoms;
138  std::vector<AugmentedAtom> GoodAtoms;
139  std::vector<ROMOL_SPTR> Patterns;
140  std::vector<ROMOL_SPTR> RotatePatterns;
141  std::vector<ROMOL_SPTR> StereoPatterns;
142  std::vector<ROMOL_SPTR> FromTautomer;
143  std::vector<ROMOL_SPTR> ToTautomer;
144 
145  double Elneg0; // elneg_table[0].value;
146  std::map<unsigned, double> ElnegTable; // AtomicNumber -> eleng
147  std::vector<IncEntry> AtomAcidity; // atom_acidity_table[]
148  std::vector<IncEntry> ChargeIncTable;
149  // std::map AtomSymbol(or AtomicNumber) -> IncEntry
150  /* [ReadTransformation() ]
151  * The alpha, beta coefficients of the transfomation function used
152  * to stretch the preliminary pKa values to the actual predictions.
153  * The function is pKa = 7 + (pKa'-7)*beta + ((pKa'-7)*alpha)^3.
154  */
155 
156  double Alpha, Beta;
157  std::vector<PathEntry> AlphaPathTable, BetaPathTable;
158 
159  public:
161 
162  void clear() { *this = StructCheckerOptions(); }
163 
164  bool loadAugmentedAtomTranslations(const std::string &path);
165  void setAugmentedAtomTranslations(
166  const std::vector<std::pair<AugmentedAtom, AugmentedAtom> > &aaPairs);
167 
168  bool loadAcidicAugmentedAtoms(const std::string &path);
169  void setAcidicAugmentedAtoms(const std::vector<AugmentedAtom> &acidicAtoms);
170 
171  bool loadGoodAugmentedAtoms(const std::string &path);
172  void setGoodAugmentedAtoms(const std::vector<AugmentedAtom> &acidicAtoms);
173 
174  bool loadPatterns(const std::string &path); // file with clean patterns
175  void parsePatterns(
176  const std::vector<std::string> &smarts); // can throw RDKit exeptions
177  void setPatterns(const std::vector<ROMOL_SPTR> &p);
178 
179  bool loadRotatePatterns(
180  const std::string &path); // file with rotate patterns
181  void parseRotatePatterns(
182  const std::vector<std::string> &smarts); // can throw RDKit exeptions
183  void setRotatePatterns(const std::vector<ROMOL_SPTR> &p);
184 
185  bool loadStereoPatterns(
186  const std::string &path); // file with stereo patterns
187  void parseStereoPatterns(
188  const std::vector<std::string> &smarts); // can throw RDKit exeptions
189  void setStereoPatterns(const std::vector<ROMOL_SPTR> &p);
190 
191  bool loadTautomerData(const std::string &path); // file path
192  void parseTautomerData(const std::vector<std::string> &smartsFrom,
193  const std::vector<std::string> &smartsTo);
194  void setTautomerData(const std::vector<ROMOL_SPTR> &from,
195  const std::vector<ROMOL_SPTR> &to);
196  bool loadChargeDataTables(const std::string &path); // file path
197 };
198 
199 bool parseOptionsJSON(const std::string &json, StructCheckerOptions &op);
200 
203  const std::string &augmentedAtomTranslationsFile = "",
204  // ?? AcidicAtoms;
205  // ?? GoodAtoms;
206  const std::string &patternFile = "", // file with clean patterns
207  const std::string &rotatePatternFile = "", // file with rotate patterns
208  const std::string &stereoPatternFile = "", // file with stereo patterns
209  const std::string &tautomerFile = "");
210 
211 //! \brief Class for performing structure validation and cleanup
212 /*! \b NOTE: This class should be considered beta. The API may change in future
213 releases.
214 
215 Examples of Usage
216 
217 \code
218  StructChecker chk;
219  int flags = StructureCheck::checkMolStructure( mol ); // use defaults
220 \endcode
221 
222 or
223 
224 \code
225  StructureCheck::StructCheckerOptions options; // use defaults
226  // To use external data
227  StructureCheck::loadOptionsFromFiles(options, file1, file2);
228  StructChecker chk(options);
229 
230  for( mol in mols ) {
231  int flags = StructureCheck::checkMolStructure( mol, &options);
232  if (0!=(flags & StructureCheck::StructureFlags::BAD_SET)) {
233  // write to error file
234  } else if (0!=(flags & StructureCheck::StructureFlags::TRANSFORMED_SET))
235 {
236  // input molecule was transformed
237  } else { // flag == NO_CHANGE
238  // no change
239  }
240  }
241 \endcode
242 */
244  public:
245  typedef enum StructureFlags {
246  NO_CHANGE = 0,
247  BAD_MOLECULE = 0x0001,
248  ALIAS_CONVERSION_FAILED = 0x0002,
249  STEREO_ERROR = 0x0004,
250  STEREO_FORCED_BAD = 0x0008,
251  ATOM_CLASH = 0x0010,
252  ATOM_CHECK_FAILED = 0x0020,
253  SIZE_CHECK_FAILED = 0x0040,
254  // reserved error = 0x0080,
255  TRANSFORMED = 0x0100,
256  FRAGMENTS_FOUND = 0x0200,
257  EITHER_WARNING = 0x0400,
258  DUBIOUS_STEREO_REMOVED = 0x0800,
259  RECHARGED = 0x1000,
260  STEREO_TRANSFORMED = 0x2000,
261  TEMPLATE_TRANSFORMED = 0x4000,
262  TAUTOMER_TRANSFORMED = 0x8000,
263  // mask:
264  BAD_SET = (BAD_MOLECULE | ALIAS_CONVERSION_FAILED | STEREO_ERROR |
265  STEREO_FORCED_BAD | ATOM_CLASH | ATOM_CHECK_FAILED |
266  SIZE_CHECK_FAILED),
267 
268  TRANSFORMED_SET = (TRANSFORMED | FRAGMENTS_FOUND | EITHER_WARNING |
269  DUBIOUS_STEREO_REMOVED | STEREO_TRANSFORMED |
270  TEMPLATE_TRANSFORMED | TAUTOMER_TRANSFORMED | RECHARGED),
271  } StructureFlags;
272  // attributes:
273  private:
274  StructCheckerOptions Options;
275 
276  public:
277  inline StructChecker() {}
278  inline StructChecker(const StructCheckerOptions &options)
279  : Options(options) {}
280 
281  const StructCheckerOptions &GetOptions() const { return Options; }
282  void SetOptions(const StructCheckerOptions &options) { Options = options; }
283 
284  // Check and fix (if need) molecule structure and return a set of
285  // StructureFlags
286  // that describes what have been done
287  unsigned checkMolStructure(RWMol &mol) const;
288 
289  // an instance independed helper methods:
290  // Converts structure property flags to a comma seperated string
291  static std::string StructureFlagsToString(unsigned flags);
292  // Converts a comma seperated string to a StructureFlag unsigned integer
293  static unsigned StringToStructureFlags(const std::string &str);
294  // internal implementation:
295  private:
296 };
297 }
298 }
299 #endif
Class for performing structure validation and cleanup.
bool loadOptionsFromFiles(StructCheckerOptions &op, const std::string &augmentedAtomTranslationsFile="", const std::string &patternFile="", const std::string &rotatePatternFile="", const std::string &stereoPatternFile="", const std::string &tautomerFile="")
const StructCheckerOptions & GetOptions() const
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:30
bool parseOptionsJSON(const std::string &json, StructCheckerOptions &op)
std::vector< std::pair< AugmentedAtom, AugmentedAtom > > AugmentedAtomPairs
AugmentedAtom(const std::string &symbol, const std::string &name, int charge, RadicalType radical, AATopology topology)
Definition: StructChecker.h:86
StructChecker(const StructCheckerOptions &options)
std::vector< AugmentedAtom > GoodAtoms
static const int ANY_CHARGE
Definition: StructChecker.h:34
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
std::vector< ROMOL_SPTR > StereoPatterns
std::map< unsigned, double > ElnegTable
void SetOptions(const StructCheckerOptions &options)
std::vector< ROMOL_SPTR > RotatePatterns
std::vector< AugmentedAtom > AcidicAtoms