RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SmilesParse.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2021 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SMILESPARSE_H
12#define RD_SMILESPARSE_H
13
15#include <string>
16#include <exception>
17#include <map>
18#include <memory>
19
20namespace RDKit {
21class RWMol;
22class Atom;
23class Bond;
24
25namespace SmilesParse {
26class RDKIT_SMILESPARSE_EXPORT SmilesParseException : public std::exception {
27 public:
28 SmilesParseException(const char *msg) : _msg(msg) {}
29 SmilesParseException(const std::string msg) : _msg(msg) {}
30 const char *what() const noexcept override { return _msg.c_str(); }
31 ~SmilesParseException() noexcept override = default;
32
33 private:
34 std::string _msg;
35};
36
37} // namespace SmilesParse
38
39namespace v2 {
40namespace SmilesParse {
42
43struct RDKIT_SMILESPARSE_EXPORT SmilesParserParams {
44 bool sanitize = true; /**< sanitize the molecule after building it */
45 bool allowCXSMILES = true; /**< recognize and parse CXSMILES*/
46 bool strictCXSMILES =
47 true; /**< throw an exception if the CXSMILES parsing fails */
48 bool parseName = true; /**< parse (and set) the molecule name as well */
49 bool removeHs = true; /**< remove Hs after constructing the molecule */
50 bool skipCleanup = false; /**< skip the final cleanup stage */
51 bool debugParse = false; /**< enable debugging in the SMILES parser*/
52 std::map<std::string, std::string>
53 replacements; /**< allows SMILES "macros" */
54};
55
56struct RDKIT_SMILESPARSE_EXPORT SmartsParserParams {
57 bool allowCXSMILES = true; /**< recognize and parse CXSMILES extensions */
58 bool strictCXSMILES =
59 true; /**< throw an exception if the CXSMILES parsing fails */
60 bool parseName = true; /**< parse (and set) the molecule name as well */
61 bool mergeHs =
62 false; /**< toggles merging H atoms in the SMARTS into neighboring atoms*/
63 bool skipCleanup = false; /**< skip the final cleanup stage */
64 bool debugParse = false; /**< enable debugging in the SMARTS parser*/
65 std::map<std::string, std::string>
66 replacements; /**< allows SMARTS "macros" */
67};
68
69RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::RWMol> MolFromSmiles(
70 const std::string &smi,
71 const SmilesParserParams &params = SmilesParserParams());
72RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::RWMol> MolFromSmarts(
73 const std::string &sma,
74 const SmartsParserParams &params = SmartsParserParams());
75
76RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::Atom> AtomFromSmiles(
77 const std::string &smi);
78RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::Bond> BondFromSmiles(
79 const std::string &smi);
80
81RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::Atom> AtomFromSmarts(
82 const std::string &sma);
83RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::Bond> BondFromSmarts(
84 const std::string &sma);
85
86} // namespace SmilesParse
87} // namespace v2
88
89inline namespace v1 {
91
93 int debugParse = 0; /**< enable debugging in the SMILES parser*/
94 bool sanitize = true; /**< sanitize the molecule after building it */
95 std::map<std::string, std::string> *replacements =
96 nullptr; /**< allows SMILES "macros" */
97 bool allowCXSMILES = true; /**< recognize and parse CXSMILES*/
98 bool strictCXSMILES =
99 true; /**< throw an exception if the CXSMILES parsing fails */
100 bool parseName = true; /**< parse (and set) the molecule name as well */
101 bool removeHs = true; /**< remove Hs after constructing the molecule */
102 bool skipCleanup = false; /**< skip the final cleanup stage */
103};
104
106 int debugParse = 0; /**< enable debugging in the SMARTS parser*/
107 std::map<std::string, std::string> *replacements =
108 nullptr; /**< allows SMARTS "macros" */
109 bool allowCXSMILES = true; /**< recognize and parse CXSMILES extensions */
110 bool strictCXSMILES =
111 true; /**< throw an exception if the CXSMILES parsing fails */
112 bool parseName = true; /**< parse (and set) the molecule name as well */
113 bool mergeHs =
114 false; /**< toggles merging H atoms in the SMARTS into neighboring atoms*/
115 bool skipCleanup = false; /**< skip the final cleanup stage */
116};
117
118inline RDKit::RWMol *SmilesToMol(const std::string &smi,
119 const SmilesParserParams &ps) {
120 RDKit::v2::SmilesParse::SmilesParserParams v2ps;
121 v2ps.debugParse = ps.debugParse;
122 v2ps.sanitize = ps.sanitize;
123
124 if (ps.replacements) {
125 v2ps.replacements = *ps.replacements;
126 }
127 v2ps.allowCXSMILES = ps.allowCXSMILES;
128 v2ps.strictCXSMILES = ps.strictCXSMILES;
129 v2ps.parseName = ps.parseName;
130 v2ps.removeHs = ps.removeHs;
131 v2ps.skipCleanup = ps.skipCleanup;
132 return RDKit::v2::SmilesParse::MolFromSmiles(smi, v2ps).release();
133}
134
135inline Atom *SmilesToAtom(const std::string &smi) {
136 auto res = RDKit::v2::SmilesParse::AtomFromSmiles(smi).release();
137 return res;
138}
139
140inline Bond *SmilesToBond(const std::string &smi) {
141 return RDKit::v2::SmilesParse::BondFromSmiles(smi).release();
142}
143
144//! Construct a molecule from a SMILES string
145/*!
146 \param smi the SMILES to convert
147 \param debugParse toggles verbose debugging information from the parser
148 \param sanitize toggles H removal and sanitization of the molecule
149 \param replacements a string->string map of replacement strings. See below
150 for more information about replacements.
151
152 \return a pointer to the new molecule; the caller is responsible for free'ing
153 this.
154
155 The optional replacements map can be used to do string substitution of
156 abbreviations
157 in the input SMILES. The set of substitutions is repeatedly looped through
158 until
159 the string no longer changes. It is the responsibility of the caller to make
160 sure
161 that substitutions results in legal and sensible SMILES.
162
163 Examples of substitutions:
164 \code
165 CC{Q}C with {"{Q}":"OCCO"} -> CCOCCOC
166 C{A}C{Q}C with {"{Q}":"OCCO", "{A}":"C1(CC1)"} -> CC1(CC1)COCCOC
167 C{A}C{Q}C with {"{Q}":"{X}CC{X}", "{A}":"C1CC1", "{X}":"N"} -> CC1CC1CNCCNC
168 \endcode
169
170 */
172 const std::string &smi, int debugParse = 0, bool sanitize = true,
173 std::map<std::string, std::string> *replacements = nullptr) {
174 RDKit::v2::SmilesParse::SmilesParserParams params;
175 params.debugParse = debugParse;
176 if (replacements) {
177 params.replacements = *replacements;
178 }
179 if (sanitize) {
180 params.sanitize = true;
181 params.removeHs = true;
182 } else {
183 params.sanitize = false;
184 params.removeHs = false;
185 }
186 return RDKit::v2::SmilesParse::MolFromSmiles(smi, params).release();
187};
188
189inline RWMol *SmartsToMol(const std::string &sma,
190 const SmartsParserParams &ps) {
191 RDKit::v2::SmilesParse::SmartsParserParams v2ps;
192 v2ps.debugParse = ps.debugParse;
193 if (ps.replacements) {
194 v2ps.replacements = *ps.replacements;
195 }
196 v2ps.allowCXSMILES = ps.allowCXSMILES;
197 v2ps.strictCXSMILES = ps.strictCXSMILES;
198 v2ps.parseName = ps.parseName;
199 v2ps.mergeHs = ps.mergeHs;
200 v2ps.skipCleanup = ps.skipCleanup;
201
202 return RDKit::v2::SmilesParse::MolFromSmarts(sma, v2ps).release();
203}
204
205//! Construct a molecule from a SMARTS string
206/*!
207 \param sma the SMARTS to convert
208 \param debugParse toggles verbose debugging information from the parser
209 \param mergeHs toggles merging H atoms in the SMARTS into neighboring
210 atoms
211 \param replacements a string->string map of replacement strings.
212 \see SmilesToMol for more information about replacements
213
214 \return a pointer to the new molecule; the caller is responsible for free'ing
215 this.
216 */
218 const std::string &sma, int debugParse = 0, bool mergeHs = false,
219 std::map<std::string, std::string> *replacements = nullptr) {
220 RDKit::v2::SmilesParse::SmartsParserParams ps;
221 ps.debugParse = debugParse;
222 ps.mergeHs = mergeHs;
223 if (replacements) {
224 ps.replacements = *replacements;
225 }
226 return RDKit::v2::SmilesParse::MolFromSmarts(sma, ps).release();
227};
228
229inline Atom *SmartsToAtom(const std::string &sma) {
230 return RDKit::v2::SmilesParse::AtomFromSmarts(sma).release();
231}
232inline Bond *SmartsToBond(const std::string &sma) {
233 return RDKit::v2::SmilesParse::BondFromSmarts(sma).release();
234}
235} // namespace v1
236
237inline std::unique_ptr<RDKit::RWMol> operator"" _smiles(const char *text,
238 size_t len) {
239 std::string smi(text, len);
240 try {
241 return v2::SmilesParse::MolFromSmiles(smi);
242 } catch (const RDKit::MolSanitizeException &) {
243 return nullptr;
244 }
245}
246inline std::unique_ptr<RDKit::RWMol> operator"" _smarts(const char *text,
247 size_t len) {
248 std::string smi(text, len);
249 return v2::SmilesParse::MolFromSmarts(smi);
250}
251
252} // namespace RDKit
253
254#endif
The class for representing atoms.
Definition Atom.h:75
class for representing a bond
Definition Bond.h:47
class for flagging sanitization errors
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
SmilesParseException(const std::string msg)
Definition SmilesParse.h:29
~SmilesParseException() noexcept override=default
const char * what() const noexcept override
Definition SmilesParse.h:30
#define RDKIT_SMILESPARSE_EXPORT
Definition export.h:497
Atom * SmilesToAtom(const std::string &smi)
Bond * SmilesToBond(const std::string &smi)
RWMol * SmartsToMol(const std::string &sma, const SmartsParserParams &ps)
Bond * SmartsToBond(const std::string &sma)
RDKit::RWMol * SmilesToMol(const std::string &smi, const SmilesParserParams &ps)
Atom * SmartsToAtom(const std::string &sma)
Std stuff.
std::map< std::string, std::string > * replacements
std::map< std::string, std::string > * replacements
Definition SmilesParse.h:95