RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SmilesWrite.h
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2021 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SMILESWRITE_H_012020
12#define RD_SMILESWRITE_H_012020
13
14#include <string>
15#include <vector>
16#include <memory>
17#include <cstdint>
18#include <limits>
20
21#include <boost/shared_ptr.hpp>
22
23namespace RDKit {
24class Atom;
25class Bond;
26class ROMol;
27
28typedef std::vector<boost::shared_ptr<ROMol>> MOL_SPTR_VECT;
29
31 bool doIsomericSmiles =
32 true; /**< include stereochemistry and isotope information */
33 bool doKekule = false; /**< kekulize the molecule before generating the SMILES
34 and output single/double bonds. NOTE that the output
35 is not canonical and that this will thrown an
36 exception if the molecule cannot be kekulized. */
37 bool canonical = true; /**< generate canonical SMILES */
38 bool cleanStereo = true; /**< clean up stereo */
39 bool allBondsExplicit = false; /**< include symbols for all bonds */
40 bool allHsExplicit = false; /**< provide hydrogen counts for every atom */
41 bool doRandom = false; /**< randomize the output order. The resulting SMILES
42 is not canonical and the value of the canonical
43 parameter will be ignored. */
44 int rootedAtAtom = -1; /**< make sure the SMILES starts at the specified
45 atom. The resulting SMILES is not canonical and
46 the value of the canonical parameter will be
47 ignored. */
48 bool includeDativeBonds =
49 true; /**< include the RDKit extension for dative bonds. Otherwise dative
50 bonds will be written as single bonds*/
51 bool ignoreAtomMapNumbers = false; /**< If true, ignores any atom map numbers
52 when canonicalizing the molecule */
53};
54
55namespace SmilesWrite {
56
58 CX_NONE = 0,
59 CX_ATOM_LABELS = 1 << 0,
60 CX_MOLFILE_VALUES = 1 << 1,
61 CX_COORDS = 1 << 2,
62 CX_RADICALS = 1 << 3,
63 CX_ATOM_PROPS = 1 << 4,
64 CX_LINKNODES = 1 << 5,
65 CX_ENHANCEDSTEREO = 1 << 6,
66 CX_SGROUPS = 1 << 7,
67 CX_POLYMER = 1 << 8,
68 CX_BOND_CFG = 1 << 9,
69 CX_BOND_ATROPISOMER = 1 << 10,
70 CX_COORDINATE_BONDS = 1 << 11,
71 CX_ALL = 0x7fffffff,
73);
74
75//! \brief returns the cxsmiles data for a molecule
77 const ROMol &mol, std::uint32_t flags = CXSmilesFields::CX_ALL);
78
79//! \brief returns the cxsmiles data for a vector of molecules
81 const std::vector<ROMol *> &mols, std::uint32_t flags);
82
83//! \brief returns true if the atom number is in the SMILES organic subset
85
86//! \brief returns the SMILES for an atom
87/*!
88 \param atom : the atom to work with
89 \param ps : the parameters controlling the SMILES generation
90*/
92 const SmilesWriteParams &ps);
93
94//! \brief returns the SMILES for an atom
95/*!
96 \param atom : the atom to work with
97 \param doKekule : we're doing kekulized smiles (e.g. don't use
98 lower case for the atom label)
99 \param bondIn : the bond we came into the atom on (unused)
100 \param allHsExplicit : if true, hydrogen counts will be provided for every
101 atom.
102 \param isomericSmiles : if true, isomeric SMILES will be generated
103*/
104inline std::string GetAtomSmiles(const Atom *atom, bool doKekule = false,
105 const Bond * = nullptr,
106 bool allHsExplicit = false,
107 bool isomericSmiles = true) {
108 // RDUNUSED_PARAM(bondIn);
111 ps.doKekule = doKekule;
112 ps.allHsExplicit = allHsExplicit;
113 return GetAtomSmiles(atom, ps);
114};
115
116//! \brief returns the SMILES for a bond
117/*!
118 \param bond : the bond to work with
119 \param ps : the parameters controlling the SMILES generation
120 \param atomToLeftIdx : the index of the atom preceding \c bond
121 in the SMILES
122*/
124 const SmilesWriteParams &ps,
125 int atomToLeftIdx = -1);
126//! \brief returns the SMILES for a bond
127/*!
128 \param bond : the bond to work with
129 \param atomToLeftIdx : the index of the atom preceding \c bond
130 in the SMILES
131 \param doKekule : we're doing kekulized smiles (e.g. write out
132 bond orders for aromatic bonds)
133 \param allBondsExplicit : if true, symbols will be included for all bonds.
134*/
135inline std::string GetBondSmiles(const Bond *bond, int atomToLeftIdx = -1,
136 bool doKekule = false,
137 bool allBondsExplicit = false) {
139 ps.doKekule = doKekule;
140 ps.allBondsExplicit = allBondsExplicit;
141 ps.doIsomericSmiles = false;
142 return GetBondSmiles(bond, ps, atomToLeftIdx);
143};
144
145namespace detail {
147 const ROMol &mol, const SmilesWriteParams &params, bool doingCXSmiles);
148}
149
150} // namespace SmilesWrite
151
152//! \brief returns canonical SMILES for a molecule
154 const ROMol &mol, const SmilesWriteParams &params);
155
156//! \brief returns SMILES for a molecule, canonical by default
157/*!
158 \param mol : the molecule in question.
159 \param doIsomericSmiles : include stereochemistry and isotope information
160 in the SMILES
161
162 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds) NOTE that
163 this will throw an exception if the molecule cannot be kekulized.
164
165 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
166 The resulting SMILES is not, of course, canonical.
167 \param canonical : if false, no attempt will be made to canonicalize the
168 SMILES
169 \param allBondsExplicit : if true, symbols will be included for all bonds.
170 \param allHsExplicit : if true, hydrogen counts will be provided for every
171 atom.
172 \param doRandom : if true, the first atom in the SMILES string will be
173 selected at random and the SMILES string will not be canonical
174 \param ignoreAtomMapNumbers : if true, ignores any atom map numbers when
175 canonicalizing the molecule
176 */
177inline std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles = true,
178 bool doKekule = false, int rootedAtAtom = -1,
179 bool canonical = true,
180 bool allBondsExplicit = false,
181 bool allHsExplicit = false,
182 bool doRandom = false,
183 bool ignoreAtomMapNumbers = false) {
185 ps.doIsomericSmiles = doIsomericSmiles;
186 ps.doKekule = doKekule;
187 ps.rootedAtAtom = rootedAtAtom;
188 ps.canonical = canonical;
189 ps.allBondsExplicit = allBondsExplicit;
190 ps.allHsExplicit = allHsExplicit;
191 ps.doRandom = doRandom;
192 ps.ignoreAtomMapNumbers = ignoreAtomMapNumbers;
193 return MolToSmiles(mol, ps);
194};
195
196//! \brief returns a vector of random SMILES for a molecule (may contain
197//! duplicates)
198/*!
199 \param mol : the molecule in question.
200 \param numSmiles : the number of SMILES to return
201 \param randomSeed : if >0, will be used to seed the random number generator
202 \param doIsomericSmiles : include stereochemistry and isotope information
203 in the SMILES
204 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
205 \param allBondsExplicit : if true, symbols will be included for all bonds.
206 \param allHsExplicit : if true, hydrogen counts will be provided for every
207 atom.
208 */
210 const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed = 0,
211 bool doIsomericSmiles = true, bool doKekule = false,
212 bool allBondsExplicit = false, bool allHsExplicit = false);
213
214//! \brief returns canonical SMILES for part of a molecule
216 const ROMol &mol, const SmilesWriteParams &params,
217 const std::vector<int> &atomsToUse,
218 const std::vector<int> *bondsToUse = nullptr,
219 const std::vector<std::string> *atomSymbols = nullptr,
220 const std::vector<std::string> *bondSymbols = nullptr);
221
222//! \brief returns canonical SMILES for part of a molecule
223/*!
224 \param mol : the molecule in question.
225 \param atomsToUse : indices of the atoms in the fragment
226 \param bondsToUse : indices of the bonds in the fragment. If this is not
227 provided,
228 all bonds between the atoms in atomsToUse will be included
229 \param atomSymbols : symbols to use for the atoms in the output SMILES
230 \param bondSymbols : symbols to use for the bonds in the output SMILES
231 \param doIsomericSmiles : include stereochemistry and isotope information
232 in the SMILES
233 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
234 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
235 The resulting SMILES is not, of course, canonical.
236 \param canonical : if false, no attempt will be made to canonicalize the
237 SMILES
238 \param allBondsExplicit : if true, symbols will be included for all bonds.
239 \param allHsExplicit : if true, hydrogen counts will be provided for every
240 atom.
241 \param doRandom : generate a randomized smiles string by randomly choosing
242 the priority to follow in the DFS traversal. [default false]
243
244 \b NOTE: the bondSymbols are *not* currently used in the canonicalization.
245
246 */
247inline std::string MolFragmentToSmiles(
248 const ROMol &mol, const std::vector<int> &atomsToUse,
249 const std::vector<int> *bondsToUse = nullptr,
250 const std::vector<std::string> *atomSymbols = nullptr,
251 const std::vector<std::string> *bondSymbols = nullptr,
252 bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1,
253 bool canonical = true, bool allBondsExplicit = false,
254 bool allHsExplicit = false) {
256 ps.doIsomericSmiles = doIsomericSmiles;
257 ps.doKekule = doKekule;
258 ps.rootedAtAtom = rootedAtAtom;
259 ps.canonical = canonical;
260 ps.allBondsExplicit = allBondsExplicit;
261 ps.allHsExplicit = allHsExplicit;
262 return MolFragmentToSmiles(mol, ps, atomsToUse, bondsToUse, atomSymbols,
264}
265
267 RestoreBondDirOptionTrue = 0, //<!DO restore bond dirs
268 RestoreBondDirOptionClear = 1 //<!clear all bond dir information
269);
270
271//! \brief returns canonical CXSMILES for a molecule
273 const ROMol &mol, const SmilesWriteParams &ps,
274 std::uint32_t flags = SmilesWrite::CXSmilesFields::CX_ALL,
276 RestoreBondDirOption::RestoreBondDirOptionClear);
277
278//! \brief returns canonical CXSMILES for a molecule
279/*!
280 \param mol : the molecule in question.
281 \param doIsomericSmiles : include stereochemistry and isotope information
282 in the SMILES
283 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
284 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
285 The resulting SMILES is not, of course, canonical.
286 \param canonical : if false, no attempt will be made to canonicalize the
287 SMILES
288 \param allBondsExplicit : if true, symbols will be included for all bonds.
289 \param allHsExplicit : if true, hydrogen counts will be provided for every
290 \param doRandom : generate a randomized smiles string by randomly choosing
291 the priority to follow in the DFS traversal. [default false]
292 atom.
293 */
294inline std::string MolToCXSmiles(const ROMol &mol, bool doIsomericSmiles = true,
295 bool doKekule = false, int rootedAtAtom = -1,
296 bool canonical = true,
297 bool allBondsExplicit = false,
298 bool allHsExplicit = false,
299 bool doRandom = false) {
301 ps.doIsomericSmiles = doIsomericSmiles;
302 ps.doKekule = doKekule;
303 ps.rootedAtAtom = rootedAtAtom;
304 ps.canonical = canonical;
305 ps.allBondsExplicit = allBondsExplicit;
306 ps.allHsExplicit = allHsExplicit;
307 ps.doRandom = doRandom;
308 return MolToCXSmiles(mol, ps, SmilesWrite::CXSmilesFields::CX_ALL);
309};
310
311//! \brief returns canonical CXSMILES for part of a molecule
313 const ROMol &mol, const SmilesWriteParams &params,
314 const std::vector<int> &atomsToUse,
315 const std::vector<int> *bondsToUse = nullptr,
316 const std::vector<std::string> *atomSymbols = nullptr,
317 const std::vector<std::string> *bondSymbols = nullptr);
318
319//! \brief returns canonical CXSMILES for part of a molecule
320/*!
321 \param mol : the molecule in question.
322 \param atomsToUse : indices of the atoms in the fragment
323 \param bondsToUse : indices of the bonds in the fragment. If this is not
324 provided,
325 all bonds between the atoms in atomsToUse will be included
326 \param atomSymbols : symbols to use for the atoms in the output SMILES
327 \param bondSymbols : symbols to use for the bonds in the output SMILES
328 \param doIsomericSmiles : include stereochemistry and isotope information
329 in the SMILES
330 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
331 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
332 The resulting SMILES is not, of course, canonical.
333 \param canonical : if false, no attempt will be made to canonicalize the
334 SMILES
335 \param allBondsExplicit : if true, symbols will be included for all bonds.
336 \param allHsExplicit : if true, hydrogen counts will be provided for every
337 atom.
338
339 \b NOTE: the bondSymbols are *not* currently used in the canonicalization.
340
341 */
342inline std::string MolFragmentToCXSmiles(
343 const ROMol &mol, const std::vector<int> &atomsToUse,
344 const std::vector<int> *bondsToUse = nullptr,
345 const std::vector<std::string> *atomSymbols = nullptr,
346 const std::vector<std::string> *bondSymbols = nullptr,
347 bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1,
348 bool canonical = true, bool allBondsExplicit = false,
349 bool allHsExplicit = false) {
351 ps.doIsomericSmiles = doIsomericSmiles;
352 ps.doKekule = doKekule;
353 ps.rootedAtAtom = rootedAtAtom;
354 ps.canonical = canonical;
355 ps.allBondsExplicit = allBondsExplicit;
356 ps.allHsExplicit = allHsExplicit;
357 return MolFragmentToCXSmiles(mol, ps, atomsToUse, bondsToUse, atomSymbols,
359}
360
361} // namespace RDKit
362#endif
#define BETTER_ENUM(Enum, Underlying,...)
Definition BetterEnums.h:17
The class for representing atoms.
Definition Atom.h:75
class for representing a bond
Definition Bond.h:47
#define RDKIT_SMILESPARSE_EXPORT
Definition export.h:505
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params, bool doingCXSmiles)
RDKIT_SMILESPARSE_EXPORT std::string GetAtomSmiles(const Atom *atom, const SmilesWriteParams &ps)
returns the SMILES for an atom
RDKIT_SMILESPARSE_EXPORT bool inOrganicSubset(int atomicNumber)
returns true if the atom number is in the SMILES organic subset
RDKIT_SMILESPARSE_EXPORT std::string GetBondSmiles(const Bond *bond, const SmilesWriteParams &ps, int atomToLeftIdx=-1)
returns the SMILES for a bond
RDKIT_SMILESPARSE_EXPORT std::string getCXExtensions(const ROMol &mol, std::uint32_t flags=CXSmilesFields::CX_ALL)
returns the cxsmiles data for a molecule
Std stuff.
RDKIT_SMILESPARSE_EXPORT std::vector< std::string > MolToRandomSmilesVect(const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed=0, bool doIsomericSmiles=true, bool doKekule=false, bool allBondsExplicit=false, bool allHsExplicit=false)
returns a vector of random SMILES for a molecule (may contain duplicates)
bool rdvalue_is(const RDValue_cast_t)
RDKIT_SMILESPARSE_EXPORT std::string MolToCXSmiles(const ROMol &mol, const SmilesWriteParams &ps, std::uint32_t flags=SmilesWrite::CXSmilesFields::CX_ALL, RestoreBondDirOption restoreBondDirs=RestoreBondDirOption::RestoreBondDirOptionClear)
returns canonical CXSMILES for a molecule
RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToSmiles(const ROMol &mol, const SmilesWriteParams &params, const std::vector< int > &atomsToUse, const std::vector< int > *bondsToUse=nullptr, const std::vector< std::string > *atomSymbols=nullptr, const std::vector< std::string > *bondSymbols=nullptr)
returns canonical SMILES for part of a molecule
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params)
returns canonical SMILES for a molecule
RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToCXSmiles(const ROMol &mol, const SmilesWriteParams &params, const std::vector< int > &atomsToUse, const std::vector< int > *bondsToUse=nullptr, const std::vector< std::string > *atomSymbols=nullptr, const std::vector< std::string > *bondSymbols=nullptr)
returns canonical CXSMILES for part of a molecule
std::vector< boost::shared_ptr< ROMol > > MOL_SPTR_VECT