RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SmilesWrite.h
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2021 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SMILESWRITE_H_012020
12#define RD_SMILESWRITE_H_012020
13
14#include <string>
15#include <vector>
16#include <memory>
17#include <cstdint>
18#include <limits>
19
20namespace RDKit {
21class Atom;
22class Bond;
23class ROMol;
24
26 bool doIsomericSmiles =
27 true; /**< include stereochemistry and isotope information */
28 bool doKekule = false; /**< kekulize the molecule before generating the SMILES
29 and output single/double bonds. NOTE that the output
30 is not canonical and that this will thrown an
31 exception if the molecule cannot be kekulized. */
32 bool canonical = true; /**< generate canonical SMILES */
33 bool allBondsExplicit = false; /**< include symbols for all bonds */
34 bool allHsExplicit = false; /**< provide hydrogen counts for every atom */
35 bool doRandom = false; /**< randomize the output order. The resulting SMILES
36 is not canonical */
37 int rootedAtAtom = -1; /**< make sure the SMILES starts at the specified
38 atom. The resulting SMILES is not canonical */
39};
40namespace SmilesWrite {
41
58
59//! \brief returns the cxsmiles data for a molecule
61 const ROMol &mol, std::uint32_t flags = CXSmilesFields::CX_ALL);
62
63//! \brief returns true if the atom number is in the SMILES organic subset
65
66//! \brief returns the SMILES for an atom
67/*!
68 \param atom : the atom to work with
69 \param doKekule : we're doing kekulized smiles (e.g. don't use
70 lower case for the atom label)
71 \param bondIn : the bond we came into the atom on (unused)
72 \param allHsExplicit : if true, hydrogen counts will be provided for every
73 atom.
74 \param isomericSmiles : if true, isomeric SMILES will be generated
75*/
77 bool doKekule = false,
78 const Bond *bondIn = nullptr,
79 bool allHsExplicit = false,
80 bool isomericSmiles = true);
81
82//! \brief returns the SMILES for a bond
83/*!
84 \param bond : the bond to work with
85 \param atomToLeftIdx : the index of the atom preceding \c bond
86 in the SMILES
87 \param doKekule : we're doing kekulized smiles (e.g. write out
88 bond orders for aromatic bonds)
89 \param allBondsExplicit : if true, symbols will be included for all bonds.
90*/
92 const Bond *bond, int atomToLeftIdx = -1, bool doKekule = false,
93 bool allBondsExplicit = false);
94
95namespace detail {
97 const ROMol &mol, const SmilesWriteParams &params, bool doingCXSmiles);
98}
99
100} // namespace SmilesWrite
101
102//! \brief returns canonical SMILES for a molecule
104 const ROMol &mol, const SmilesWriteParams &params);
105
106//! \brief returns canonical SMILES for a molecule
107/*!
108 \param mol : the molecule in question.
109 \param doIsomericSmiles : include stereochemistry and isotope information
110 in the SMILES
111
112 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds) NOTE that
113 this will throw an exception if the molecule cannot be kekulized.
114
115 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
116 The resulting SMILES is not, of course, canonical.
117 \param canonical : if false, no attempt will be made to canonicalize the
118 SMILES
119 \param allBondsExplicit : if true, symbols will be included for all bonds.
120 \param allHsExplicit : if true, hydrogen counts will be provided for every
121 atom.
122 */
123inline std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles = true,
124 bool doKekule = false, int rootedAtAtom = -1,
125 bool canonical = true,
126 bool allBondsExplicit = false,
127 bool allHsExplicit = false,
128 bool doRandom = false) {
130 ps.doIsomericSmiles = doIsomericSmiles;
131 ps.doKekule = doKekule;
132 ps.rootedAtAtom = rootedAtAtom;
133 ps.canonical = canonical;
134 ps.allBondsExplicit = allBondsExplicit;
135 ps.allHsExplicit = allHsExplicit;
136 ps.doRandom = doRandom;
137 return MolToSmiles(mol, ps);
138};
139
140//! \brief returns a vector of random SMILES for a molecule (may contain
141//! duplicates)
142/*!
143 \param mol : the molecule in question.
144 \param numSmiles : the number of SMILES to return
145 \param randomSeed : if >0, will be used to seed the random number generator
146 \param doIsomericSmiles : include stereochemistry and isotope information
147 in the SMILES
148 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
149 \param allBondsExplicit : if true, symbols will be included for all bonds.
150 \param allHsExplicit : if true, hydrogen counts will be provided for every
151 atom.
152 */
154 const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed = 0,
155 bool doIsomericSmiles = true, bool doKekule = false,
156 bool allBondsExplicit = false, bool allHsExplicit = false);
157
158//! \brief returns canonical SMILES for part of a molecule
160 const ROMol &mol, const SmilesWriteParams &params,
161 const std::vector<int> &atomsToUse,
162 const std::vector<int> *bondsToUse = nullptr,
163 const std::vector<std::string> *atomSymbols = nullptr,
164 const std::vector<std::string> *bondSymbols = nullptr);
165
166//! \brief returns canonical SMILES for part of a molecule
167/*!
168 \param mol : the molecule in question.
169 \param atomsToUse : indices of the atoms in the fragment
170 \param bondsToUse : indices of the bonds in the fragment. If this is not
171 provided,
172 all bonds between the atoms in atomsToUse will be included
173 \param atomSymbols : symbols to use for the atoms in the output SMILES
174 \param bondSymbols : symbols to use for the bonds in the output SMILES
175 \param doIsomericSmiles : include stereochemistry and isotope information
176 in the SMILES
177 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
178 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
179 The resulting SMILES is not, of course, canonical.
180 \param canonical : if false, no attempt will be made to canonicalize the
181 SMILES
182 \param allBondsExplicit : if true, symbols will be included for all bonds.
183 \param allHsExplicit : if true, hydrogen counts will be provided for every
184 atom.
185 \param doRandom : generate a randomized smiles string by randomly choosing
186 the priority to follow in the DFS traversal. [default false]
187
188 \b NOTE: the bondSymbols are *not* currently used in the canonicalization.
189
190 */
191inline std::string MolFragmentToSmiles(
192 const ROMol &mol, const std::vector<int> &atomsToUse,
193 const std::vector<int> *bondsToUse = nullptr,
194 const std::vector<std::string> *atomSymbols = nullptr,
195 const std::vector<std::string> *bondSymbols = nullptr,
196 bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1,
197 bool canonical = true, bool allBondsExplicit = false,
198 bool allHsExplicit = false) {
200 ps.doIsomericSmiles = doIsomericSmiles;
201 ps.doKekule = doKekule;
202 ps.rootedAtAtom = rootedAtAtom;
203 ps.canonical = canonical;
204 ps.allBondsExplicit = allBondsExplicit;
205 ps.allHsExplicit = allHsExplicit;
206 return MolFragmentToSmiles(mol, ps, atomsToUse, bondsToUse, atomSymbols,
208}
209
211 RestoreBondDirOptionTrue = 0, //<!DO restore bond dirs
212 RestoreBondDirOptionClear = 1 //<! clear all bond dir information
214
215//! \brief returns canonical CXSMILES for a molecule
217 const ROMol &mol, const SmilesWriteParams &ps,
218 std::uint32_t flags = SmilesWrite::CXSmilesFields::CX_ALL,
220
221//! \brief returns canonical CXSMILES for a molecule
222/*!
223 \param mol : the molecule in question.
224 \param doIsomericSmiles : include stereochemistry and isotope information
225 in the SMILES
226 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
227 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
228 The resulting SMILES is not, of course, canonical.
229 \param canonical : if false, no attempt will be made to canonicalize the
230 SMILES
231 \param allBondsExplicit : if true, symbols will be included for all bonds.
232 \param allHsExplicit : if true, hydrogen counts will be provided for every
233 \param doRandom : generate a randomized smiles string by randomly choosing
234 the priority to follow in the DFS traversal. [default false]
235 atom.
236 */
237inline std::string MolToCXSmiles(const ROMol &mol, bool doIsomericSmiles = true,
238 bool doKekule = false, int rootedAtAtom = -1,
239 bool canonical = true,
240 bool allBondsExplicit = false,
241 bool allHsExplicit = false,
242 bool doRandom = false) {
244 ps.doIsomericSmiles = doIsomericSmiles;
245 ps.doKekule = doKekule;
246 ps.rootedAtAtom = rootedAtAtom;
247 ps.canonical = canonical;
248 ps.allBondsExplicit = allBondsExplicit;
249 ps.allHsExplicit = allHsExplicit;
250 ps.doRandom = doRandom;
252};
253
254//! \brief returns canonical CXSMILES for part of a molecule
256 const ROMol &mol, const SmilesWriteParams &params,
257 const std::vector<int> &atomsToUse,
258 const std::vector<int> *bondsToUse = nullptr,
259 const std::vector<std::string> *atomSymbols = nullptr,
260 const std::vector<std::string> *bondSymbols = nullptr);
261
262//! \brief returns canonical CXSMILES for part of a molecule
263/*!
264 \param mol : the molecule in question.
265 \param atomsToUse : indices of the atoms in the fragment
266 \param bondsToUse : indices of the bonds in the fragment. If this is not
267 provided,
268 all bonds between the atoms in atomsToUse will be included
269 \param atomSymbols : symbols to use for the atoms in the output SMILES
270 \param bondSymbols : symbols to use for the bonds in the output SMILES
271 \param doIsomericSmiles : include stereochemistry and isotope information
272 in the SMILES
273 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
274 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
275 The resulting SMILES is not, of course, canonical.
276 \param canonical : if false, no attempt will be made to canonicalize the
277 SMILES
278 \param allBondsExplicit : if true, symbols will be included for all bonds.
279 \param allHsExplicit : if true, hydrogen counts will be provided for every
280 atom.
281
282 \b NOTE: the bondSymbols are *not* currently used in the canonicalization.
283
284 */
285inline std::string MolFragmentToCXSmiles(
286 const ROMol &mol, const std::vector<int> &atomsToUse,
287 const std::vector<int> *bondsToUse = nullptr,
288 const std::vector<std::string> *atomSymbols = nullptr,
289 const std::vector<std::string> *bondSymbols = nullptr,
290 bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1,
291 bool canonical = true, bool allBondsExplicit = false,
292 bool allHsExplicit = false) {
294 ps.doIsomericSmiles = doIsomericSmiles;
295 ps.doKekule = doKekule;
296 ps.rootedAtAtom = rootedAtAtom;
297 ps.canonical = canonical;
298 ps.allBondsExplicit = allBondsExplicit;
299 ps.allHsExplicit = allHsExplicit;
300 return MolFragmentToCXSmiles(mol, ps, atomsToUse, bondsToUse, atomSymbols,
302}
303
304} // namespace RDKit
305#endif
The class for representing atoms.
Definition Atom.h:75
class for representing a bond
Definition Bond.h:47
#define RDKIT_SMILESPARSE_EXPORT
Definition export.h:481
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params, bool doingCXSmiles)
RDKIT_SMILESPARSE_EXPORT bool inOrganicSubset(int atomicNumber)
returns true if the atom number is in the SMILES organic subset
RDKIT_SMILESPARSE_EXPORT std::string GetBondSmiles(const Bond *bond, int atomToLeftIdx=-1, bool doKekule=false, bool allBondsExplicit=false)
returns the SMILES for a bond
RDKIT_SMILESPARSE_EXPORT std::string getCXExtensions(const ROMol &mol, std::uint32_t flags=CXSmilesFields::CX_ALL)
returns the cxsmiles data for a molecule
RDKIT_SMILESPARSE_EXPORT std::string GetAtomSmiles(const Atom *atom, bool doKekule=false, const Bond *bondIn=nullptr, bool allHsExplicit=false, bool isomericSmiles=true)
returns the SMILES for an atom
Std stuff.
RDKIT_SMILESPARSE_EXPORT std::vector< std::string > MolToRandomSmilesVect(const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed=0, bool doIsomericSmiles=true, bool doKekule=false, bool allBondsExplicit=false, bool allHsExplicit=false)
returns a vector of random SMILES for a molecule (may contain duplicates)
bool rdvalue_is(const RDValue_cast_t)
RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToSmiles(const ROMol &mol, const SmilesWriteParams &params, const std::vector< int > &atomsToUse, const std::vector< int > *bondsToUse=nullptr, const std::vector< std::string > *atomSymbols=nullptr, const std::vector< std::string > *bondSymbols=nullptr)
returns canonical SMILES for part of a molecule
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params)
returns canonical SMILES for a molecule
RDKIT_SMILESPARSE_EXPORT std::string MolToCXSmiles(const ROMol &mol, const SmilesWriteParams &ps, std::uint32_t flags=SmilesWrite::CXSmilesFields::CX_ALL, RestoreBondDirOption restoreBondDirs=RestoreBondDirOptionClear)
returns canonical CXSMILES for a molecule
RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToCXSmiles(const ROMol &mol, const SmilesWriteParams &params, const std::vector< int > &atomsToUse, const std::vector< int > *bondsToUse=nullptr, const std::vector< std::string > *atomSymbols=nullptr, const std::vector< std::string > *bondSymbols=nullptr)
returns canonical CXSMILES for part of a molecule
RestoreBondDirOption
@ RestoreBondDirOptionTrue
@ RestoreBondDirOptionClear