RDKit
Open-source cheminformatics and machine learning.
MolWriters.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2013 Greg Landrum, Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #ifndef _RD_MOLWRITERS_H_
12 #define _RD_MOLWRITERS_H_
13 
14 #include <RDGeneral/types.h>
15 
16 #include <string>
17 #include <iostream>
18 #include <GraphMol/ROMol.h>
19 
20 namespace RDKit {
21 
22 static int defaultConfId = -1;
23 class MolWriter {
24  public:
25  virtual ~MolWriter() {}
26  virtual void write(const ROMol &mol, int confId = defaultConfId) = 0;
27  virtual void flush() = 0;
28  virtual void close() = 0;
29  virtual void setProps(const STR_VECT &propNames) = 0;
30  virtual unsigned int numMols() const = 0;
31 };
32 
33 //! The SmilesWriter is for writing molecules and properties to
34 //! delimited text files.
35 class SmilesWriter : public MolWriter {
36  /******************************************************************************
37  * A Smiles Table writer - this is how it is used
38  * - create a SmilesWriter with a output file name (or a ostream), a
39  *delimiter,
40  * and a list of properties that need to be written out
41  * - then a call is made to the write function for each molecule that needs
42  *to
43  * be written out
44  ******************************************************************************/
45  public:
46  /*!
47  \param fileName : filename to write to ("-" to write to stdout)
48  \param delimiter : delimiter to use in the text file
49  \param nameHeader : used to label the name column in the output. If this
50  is provided as the empty string, no names will be
51  written.
52  \param includeHeader : toggles inclusion of a header line in the output
53  \param isomericSmiles : toggles generation of isomeric SMILES
54  \param kekuleSmiles : toggles the generation of kekule SMILES
55 
56  */
57  SmilesWriter(const std::string &fileName, const std::string &delimiter = " ",
58  const std::string &nameHeader = "Name",
59  bool includeHeader = true, bool isomericSmiles = false,
60  bool kekuleSmiles = false);
61  //! \overload
62  SmilesWriter(std::ostream *outStream, std::string delimiter = " ",
63  std::string nameHeader = "Name", bool includeHeader = true,
64  bool takeOwnership = false, bool isomericSmiles = false,
65  bool kekuleSmiles = false);
66 
67  ~SmilesWriter();
68 
69  //! \brief set a vector of property names that are need to be
70  //! written out for each molecule
71  void setProps(const STR_VECT &propNames);
72 
73  //! \brief write a new molecule to the file
74  void write(const ROMol &mol, int confId = defaultConfId);
75 
76  //! \brief flush the ostream
77  void flush() {
78  PRECONDITION(dp_ostream, "no output stream");
79  try {
80  dp_ostream->flush();
81  } catch (...) {
82  try {
83  if (dp_ostream->good())
84  dp_ostream->setstate(std::ios::badbit);
85  } catch (const std::runtime_error& e) {
86  }
87  }
88  };
89 
90  //! \brief close our stream (the writer cannot be used again)
91  void close() {
92  flush();
93  std::ostream *tmp_ostream = dp_ostream;
94  dp_ostream = NULL;
95  if (df_owner) {
96  df_owner = false;
97  delete tmp_ostream;
98  }
99  };
100 
101  //! \brief get the number of molecules written so far
102  unsigned int numMols() const { return d_molid; };
103 
104  private:
105  // local initialization
106  void init(const std::string &delimiter, const std::string &nameHeader,
107  bool includeHeader, bool isomericSmiles, bool kekuleSmiles);
108 
109  // dumps a header line to the output stream
110  void dumpHeader() const;
111 
112  std::ostream *dp_ostream;
113  bool df_owner;
114  bool df_includeHeader; // whether or not to include a title line
115  unsigned int d_molid; // the number of the molecules we wrote so far
116  std::string d_delim; // delimiter string between various records
117  std::string d_nameHeader; // header for the name column in the output file
118  STR_VECT d_props; // list of property name that need to be written out
119  bool df_isomericSmiles; // whether or not to do isomeric smiles
120  bool df_kekuleSmiles; // whether or not to do kekule smiles
121 };
122 
123 //! The SDWriter is for writing molecules and properties to
124 //! SD files
125 class SDWriter : public MolWriter {
126  /**************************************************************************************
127  * A SD file ( or stream) writer - this is how it is used
128  * - create a SDMolWriter with a output file name (or a ostream),
129  * and a list of properties that need to be written out
130  * - then a call is made to the write function for each molecule that needs
131  *to be written out
132  **********************************************************************************************/
133  public:
134  /*!
135  \param fileName : filename to write to ("-" to write to stdout)
136  */
137  SDWriter(const std::string &fileName);
138  SDWriter(std::ostream *outStream, bool takeOwnership = false);
139 
140  ~SDWriter();
141 
142  //! \brief set a vector of property names that are need to be
143  //! written out for each molecule
144  void setProps(const STR_VECT &propNames);
145 
146  //! \brief return the text that would be written to the file
147  static std::string getText(const ROMol &mol, int confId = defaultConfId,
148  bool kekulize = true, bool force_V3000 = false,
149  int molid = -1, STR_VECT *propNames = NULL);
150 
151  //! \brief write a new molecule to the file
152  void write(const ROMol &mol, int confId = defaultConfId);
153 
154  //! \brief flush the ostream
155  void flush() {
156  PRECONDITION(dp_ostream, "no output stream");
157  try {
158  dp_ostream->flush();
159  } catch (...) {
160  try {
161  if (dp_ostream->good())
162  dp_ostream->setstate(std::ios::badbit);
163  } catch (const std::runtime_error& e) {
164  }
165  }
166  };
167 
168  //! \brief close our stream (the writer cannot be used again)
169  void close() {
170  flush();
171  std::ostream *tmp_ostream = dp_ostream;
172  dp_ostream = NULL;
173  if (df_owner) {
174  df_owner = false;
175  delete tmp_ostream;
176  }
177  };
178 
179  //! \brief get the number of molecules written so far
180  unsigned int numMols() const { return d_molid; };
181 
182  void setForceV3000(bool val) { df_forceV3000 = val; };
183  bool getForceV3000() const { return df_forceV3000; };
184 
185  void setKekulize(bool val) { df_kekulize = val; };
186  bool getKekulize() const { return df_kekulize; };
187 
188  private:
189  void writeProperty(const ROMol &mol, const std::string &name);
190 
191  std::ostream *dp_ostream;
192  bool df_owner;
193  unsigned int d_molid; // the number of the molecules we wrote so far
194  STR_VECT d_props; // list of property name that need to be written out
195  bool df_forceV3000; // force writing the mol blocks as V3000
196  bool df_kekulize; // toggle kekulization of molecules on writing
197 };
198 
199 //! The TDTWriter is for writing molecules and properties to
200 //! TDT files
201 class TDTWriter : public MolWriter {
202  /**************************************************************************************
203  * A TDT file ( or stream) writer - this is how it is used
204  * - create a TDTWriter with a output file name (or a ostream),
205  * and a list of properties that need to be written out
206  * - then a call is made to the write function for each molecule that needs
207  *to be written out
208  **********************************************************************************************/
209  public:
210  /*!
211  \param fileName : filename to write to ("-" to write to stdout)
212  */
213  TDTWriter(const std::string &fileName);
214  TDTWriter(std::ostream *outStream, bool takeOwnership = false);
215 
216  ~TDTWriter();
217 
218  //! \brief set a vector of property names that are need to be
219  //! written out for each molecule
220  void setProps(const STR_VECT &propNames);
221 
222  //! \brief write a new molecule to the file
223  void write(const ROMol &mol, int confId = defaultConfId);
224 
225  //! \brief flush the ostream
226  void flush() {
227  PRECONDITION(dp_ostream, "no output stream");
228  try {
229  dp_ostream->flush();
230  } catch (...) {
231  try {
232  if (dp_ostream->good())
233  dp_ostream->setstate(std::ios::badbit);
234  } catch (const std::runtime_error& e) {
235  }
236  }
237  };
238 
239  //! \brief close our stream (the writer cannot be used again)
240  void close() {
241  flush();
242  std::ostream *tmp_ostream = dp_ostream;
243  dp_ostream = NULL;
244  if (df_owner) {
245  df_owner = false;
246  delete tmp_ostream;
247  }
248  };
249 
250  //! \brief get the number of molecules written so far
251  unsigned int numMols() const { return d_molid; };
252 
253  void setWrite2D(bool state = true) { df_write2D = state; };
254  bool getWrite2D() const { return df_write2D; };
255 
256  void setWriteNames(bool state = true) { df_writeNames = state; };
257  bool getWriteNames() const { return df_writeNames; };
258 
259  void setNumDigits(unsigned int numDigits) { d_numDigits = numDigits; };
260  unsigned int getNumDigits() const { return d_numDigits; };
261 
262  private:
263  void writeProperty(const ROMol &mol, const std::string &name);
264 
265  std::ostream *dp_ostream;
266  bool df_owner;
267  unsigned int d_molid; // the number of molecules we wrote so far
268  STR_VECT d_props; // list of property name that need to be written out
269  bool df_write2D; // write 2D coordinates instead of 3D
270  bool df_writeNames; // write a name record for each molecule
271  unsigned int
272  d_numDigits; // number of digits to use in our output of coordinates;
273 };
274 
275 //! The PDBWriter is for writing molecules to Brookhaven Protein
276 //! DataBank format files.
277 class PDBWriter : public MolWriter {
278  public:
279  PDBWriter(const std::string &fileName, unsigned int flavor = 0);
280  PDBWriter(std::ostream *outStream, bool takeOwnership = false,
281  unsigned int flavor = 0);
282  ~PDBWriter();
283 
284  //! \brief write a new molecule to the file
285  void write(const ROMol &mol, int confId = defaultConfId);
286 
287  void setProps(const STR_VECT &){};
288 
289  //! \brief flush the ostream
290  void flush() {
291  PRECONDITION(dp_ostream, "no output stream");
292  try {
293  dp_ostream->flush();
294  } catch (...) {
295  try {
296  if (dp_ostream->good())
297  dp_ostream->setstate(std::ios::badbit);
298  } catch (const std::runtime_error& e) {
299  }
300  }
301  };
302 
303  //! \brief close our stream (the writer cannot be used again)
304  void close() {
305  flush();
306  std::ostream *tmp_ostream = dp_ostream;
307  dp_ostream = NULL;
308  if (df_owner) {
309  df_owner = false;
310  delete tmp_ostream;
311  }
312  };
313 
314  //! \brief get the number of molecules written so far
315  unsigned int numMols() const { return d_count; };
316 
317  private:
318  std::ostream *dp_ostream;
319  unsigned int d_flavor;
320  unsigned int d_count;
321  bool df_owner;
322 };
323 }
324 
325 #endif
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:91
void setProps(const STR_VECT &propNames)
set a vector of property names that are need to be written out for each molecule
void setProps(const STR_VECT &)
Definition: MolWriters.h:287
bool getWriteNames() const
Definition: MolWriters.h:257
virtual ~MolWriter()
Definition: MolWriters.h:25
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:240
virtual unsigned int numMols() const =0
PDBWriter(const std::string &fileName, unsigned int flavor=0)
void flush()
flush the ostream
Definition: MolWriters.h:290
virtual void close()=0
void flush()
flush the ostream
Definition: MolWriters.h:77
void setProps(const STR_VECT &propNames)
set a vector of property names that are need to be written out for each molecule
Defines the primary molecule class ROMol as well as associated typedefs.
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:169
SmilesWriter(const std::string &fileName, const std::string &delimiter=" ", const std::string &nameHeader="Name", bool includeHeader=true, bool isomericSmiles=false, bool kekuleSmiles=false)
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:304
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:102
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:102
bool getForceV3000() const
Definition: MolWriters.h:183
void flush()
flush the ostream
Definition: MolWriters.h:155
bool getWrite2D() const
Definition: MolWriters.h:254
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:251
void write(const ROMol &mol, int confId=defaultConfId)
write a new molecule to the file
virtual void flush()=0
static int defaultConfId
Definition: MolWriters.h:22
void write(const ROMol &mol, int confId=defaultConfId)
write a new molecule to the file
void setNumDigits(unsigned int numDigits)
Definition: MolWriters.h:259
void setWriteNames(bool state=true)
Definition: MolWriters.h:256
virtual void setProps(const STR_VECT &propNames)=0
void setForceV3000(bool val)
Definition: MolWriters.h:182
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
bool getKekulize() const
Definition: MolWriters.h:186
void write(const ROMol &mol, int confId=defaultConfId)
write a new molecule to the file
TDTWriter(const std::string &fileName)
SDWriter(const std::string &fileName)
unsigned int getNumDigits() const
Definition: MolWriters.h:260
virtual void write(const ROMol &mol, int confId=defaultConfId)=0
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:180
#define PRECONDITION(expr, mess)
Definition: Invariant.h:103
void setWrite2D(bool state=true)
Definition: MolWriters.h:253
void write(const ROMol &mol, int confId=defaultConfId)
write a new molecule to the file
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:315
void setProps(const STR_VECT &propNames)
set a vector of property names that are need to be written out for each molecule
static std::string getText(const ROMol &mol, int confId=defaultConfId, bool kekulize=true, bool force_V3000=false, int molid=-1, STR_VECT *propNames=NULL)
return the text that would be written to the file
void flush()
flush the ostream
Definition: MolWriters.h:226
std::vector< std::string > STR_VECT
Definition: Dict.h:26
void setKekulize(bool val)
Definition: MolWriters.h:185