RDKit
Open-source cheminformatics and machine learning.
MolWriters.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2013 Greg Landrum, Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #ifndef _RD_MOLWRITERS_H_
12 #define _RD_MOLWRITERS_H_
13 
14 #include <RDGeneral/types.h>
15 
16 #include <string>
17 #include <iostream>
18 #include <GraphMol/ROMol.h>
19 
20 namespace RDKit {
21 
22 static int defaultConfId = -1;
23 class MolWriter {
24  public:
25  virtual ~MolWriter() {}
26  virtual void write(const ROMol &mol, int confId = defaultConfId) = 0;
27  virtual void flush() = 0;
28  virtual void close() = 0;
29  virtual void setProps(const STR_VECT &propNames) = 0;
30  virtual unsigned int numMols() const = 0;
31 };
32 
33 //! The SmilesWriter is for writing molecules and properties to
34 //! delimited text files.
35 class SmilesWriter : public MolWriter {
36  /******************************************************************************
37  * A Smiles Table writer - this is how it is used
38  * - create a SmilesWriter with a output file name (or a ostream), a
39  *delimiter,
40  * and a list of properties that need to be written out
41  * - then a call is made to the write function for each molecule that needs
42  *to
43  * be written out
44  ******************************************************************************/
45  public:
46  /*!
47  \param fileName : filename to write to ("-" to write to stdout)
48  \param delimiter : delimiter to use in the text file
49  \param nameHeader : used to label the name column in the output. If this
50  is provided as the empty string, no names will be
51  written.
52  \param includeHeader : toggles inclusion of a header line in the output
53  \param isomericSmiles : toggles generation of isomeric SMILES
54  \param kekuleSmiles : toggles the generation of kekule SMILES
55 
56  */
57  SmilesWriter(const std::string &fileName, const std::string &delimiter = " ",
58  const std::string &nameHeader = "Name",
59  bool includeHeader = true, bool isomericSmiles = false,
60  bool kekuleSmiles = false);
61  //! \overload
62  SmilesWriter(std::ostream *outStream, std::string delimiter = " ",
63  std::string nameHeader = "Name", bool includeHeader = true,
64  bool takeOwnership = false, bool isomericSmiles = false,
65  bool kekuleSmiles = false);
66 
67  ~SmilesWriter();
68 
69  //! \brief set a vector of property names that are need to be
70  //! written out for each molecule
71  void setProps(const STR_VECT &propNames);
72 
73  //! \brief write a new molecule to the file
74  void write(const ROMol &mol, int confId = defaultConfId);
75 
76  //! \brief flush the ostream
77  void flush() {
78  PRECONDITION(dp_ostream, "no output stream");
79  try {
80  dp_ostream->flush();
81  } catch (...) {
82  try {
83  if (dp_ostream->good())
84  dp_ostream->setstate(std::ios::badbit);
85  } catch (const std::runtime_error& e) {
86  }
87  }
88  };
89 
90  //! \brief close our stream (the writer cannot be used again)
91  void close() {
92  flush();
93  std::ostream *tmp_ostream = dp_ostream;
94  dp_ostream = NULL;
95  if (df_owner) {
96  df_owner = false;
97  delete tmp_ostream;
98  }
99  };
100 
101  //! \brief get the number of molecules written so far
102  unsigned int numMols() const { return d_molid; };
103 
104  private:
105  // local initialization
106  void init(const std::string &delimiter, const std::string &nameHeader,
107  bool includeHeader, bool isomericSmiles, bool kekuleSmiles);
108 
109  // dumps a header line to the output stream
110  void dumpHeader() const;
111 
112  std::ostream *dp_ostream;
113  bool df_owner;
114  bool df_includeHeader; // whether or not to include a title line
115  unsigned int d_molid; // the number of the molecules we wrote so far
116  std::string d_delim; // delimiter string between various records
117  std::string d_nameHeader; // header for the name column in the output file
118  STR_VECT d_props; // list of property name that need to be written out
119  bool df_isomericSmiles; // whether or not to do isomeric smiles
120  bool df_kekuleSmiles; // whether or not to do kekule smiles
121 };
122 
123 //! The SDWriter is for writing molecules and properties to
124 //! SD files
125 class SDWriter : public MolWriter {
126  /**************************************************************************************
127  * A SD file ( or stream) writer - this is how it is used
128  * - create a SDMolWriter with a output file name (or a ostream),
129  * and a list of properties that need to be written out
130  * - then a call is made to the write function for each molecule that needs
131  *to be written out
132  **********************************************************************************************/
133  public:
134  /*!
135  \param fileName : filename to write to ("-" to write to stdout)
136  */
137  SDWriter(const std::string &fileName);
138  SDWriter(std::ostream *outStream, bool takeOwnership = false);
139 
140  ~SDWriter();
141 
142  //! \brief set a vector of property names that are need to be
143  //! written out for each molecule
144  void setProps(const STR_VECT &propNames);
145 
146  //! \brief return the text that would be written to the file
147  static std::string getText(const ROMol &mol, int confId = defaultConfId,
148  bool kekulize = true, bool force_V3000 = false,
149  int molid = -1, STR_VECT *propNames = NULL);
150 
151  //! \brief write a new molecule to the file
152  void write(const ROMol &mol, int confId = defaultConfId);
153 
154  //! \brief flush the ostream
155  void flush() {
156  PRECONDITION(dp_ostream, "no output stream");
157  try {
158  dp_ostream->flush();
159  } catch (...) {
160  try {
161  if (dp_ostream->good())
162  dp_ostream->setstate(std::ios::badbit);
163  } catch (const std::runtime_error& e) {
164  }
165  }
166  };
167 
168  //! \brief close our stream (the writer cannot be used again)
169  void close() {
170  flush();
171  std::ostream *tmp_ostream = dp_ostream;
172  dp_ostream = NULL;
173  if (df_owner) {
174  df_owner = false;
175  delete tmp_ostream;
176  }
177  };
178 
179  //! \brief get the number of molecules written so far
180  unsigned int numMols() const { return d_molid; };
181 
182  void setForceV3000(bool val) { df_forceV3000 = val; };
183  bool getForceV3000() const { return df_forceV3000; };
184 
185  void setKekulize(bool val) { df_kekulize = val; };
186  bool getKekulize() const { return df_kekulize; };
187 
188  private:
189  void writeProperty(const ROMol &mol, const std::string &name);
190 
191  std::ostream *dp_ostream;
192  bool df_owner;
193  unsigned int d_molid; // the number of the molecules we wrote so far
194  STR_VECT d_props; // list of property name that need to be written out
195  bool df_forceV3000; // force writing the mol blocks as V3000
196  bool df_kekulize; // toggle kekulization of molecules on writing
197 };
198 
199 //! The TDTWriter is for writing molecules and properties to
200 //! TDT files
201 class TDTWriter : public MolWriter {
202  /**************************************************************************************
203  * A TDT file ( or stream) writer - this is how it is used
204  * - create a TDTWriter with a output file name (or a ostream),
205  * and a list of properties that need to be written out
206  * - then a call is made to the write function for each molecule that needs
207  *to be written out
208  **********************************************************************************************/
209  public:
210  /*!
211  \param fileName : filename to write to ("-" to write to stdout)
212  */
213  TDTWriter(const std::string &fileName);
214  TDTWriter(std::ostream *outStream, bool takeOwnership = false);
215 
216  ~TDTWriter();
217 
218  //! \brief set a vector of property names that are need to be
219  //! written out for each molecule
220  void setProps(const STR_VECT &propNames);
221 
222  //! \brief write a new molecule to the file
223  void write(const ROMol &mol, int confId = defaultConfId);
224 
225  //! \brief flush the ostream
226  void flush() {
227  PRECONDITION(dp_ostream, "no output stream");
228  try {
229  dp_ostream->flush();
230  } catch (...) {
231  try {
232  if (dp_ostream->good())
233  dp_ostream->setstate(std::ios::badbit);
234  } catch (const std::runtime_error& e) {
235  }
236  }
237  };
238 
239  //! \brief close our stream (the writer cannot be used again)
240  void close() {
241  flush();
242  std::ostream *tmp_ostream = dp_ostream;
243  dp_ostream = NULL;
244  if (df_owner) {
245  df_owner = false;
246  delete tmp_ostream;
247  }
248  };
249 
250  //! \brief get the number of molecules written so far
251  unsigned int numMols() const { return d_molid; };
252 
253  void setWrite2D(bool state = true) { df_write2D = state; };
254  bool getWrite2D() const { return df_write2D; };
255 
256  void setWriteNames(bool state = true) { df_writeNames = state; };
257  bool getWriteNames() const { return df_writeNames; };
258 
259  void setNumDigits(unsigned int numDigits) { d_numDigits = numDigits; };
260  unsigned int getNumDigits() const { return d_numDigits; };
261 
262  private:
263  void writeProperty(const ROMol &mol, const std::string &name);
264 
265  std::ostream *dp_ostream;
266  bool df_owner;
267  unsigned int d_molid; // the number of molecules we wrote so far
268  STR_VECT d_props; // list of property name that need to be written out
269  bool df_write2D; // write 2D coordinates instead of 3D
270  bool df_writeNames; // write a name record for each molecule
271  unsigned int
272  d_numDigits; // number of digits to use in our output of coordinates;
273 };
274 
275 //! The PDBWriter is for writing molecules to Brookhaven Protein
276 //! DataBank format files.
277 class PDBWriter : public MolWriter {
278  public:
279  PDBWriter(const std::string &fileName, unsigned int flavor = 0);
280  PDBWriter(std::ostream *outStream, bool takeOwnership = false,
281  unsigned int flavor = 0);
282  ~PDBWriter();
283 
284  //! \brief write a new molecule to the file
285  void write(const ROMol &mol, int confId = defaultConfId);
286 
287  void setProps(const STR_VECT &){};
288 
289  //! \brief flush the ostream
290  void flush() {
291  PRECONDITION(dp_ostream, "no output stream");
292  try {
293  dp_ostream->flush();
294  } catch (...) {
295  try {
296  if (dp_ostream->good())
297  dp_ostream->setstate(std::ios::badbit);
298  } catch (const std::runtime_error& e) {
299  }
300  }
301  };
302 
303  //! \brief close our stream (the writer cannot be used again)
304  void close() {
305  flush();
306  std::ostream *tmp_ostream = dp_ostream;
307  dp_ostream = NULL;
308  if (df_owner) {
309  df_owner = false;
310  delete tmp_ostream;
311  }
312  };
313 
314  //! \brief get the number of molecules written so far
315  unsigned int numMols() const { return d_count; };
316 
317  private:
318  std::ostream *dp_ostream;
319  unsigned int d_flavor;
320  unsigned int d_count;
321  bool df_owner;
322 };
323 }
324 
325 #endif
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:91
void setProps(const STR_VECT &)
Definition: MolWriters.h:287
bool getWriteNames() const
Definition: MolWriters.h:257
virtual ~MolWriter()
Definition: MolWriters.h:25
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:240
virtual unsigned int numMols() const =0
void flush()
flush the ostream
Definition: MolWriters.h:290
virtual void close()=0
void flush()
flush the ostream
Definition: MolWriters.h:77
Defines the primary molecule class ROMol as well as associated typedefs.
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:169
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:304
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:103
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:102
bool getForceV3000() const
Definition: MolWriters.h:183
void flush()
flush the ostream
Definition: MolWriters.h:155
bool getWrite2D() const
Definition: MolWriters.h:254
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:251
virtual void flush()=0
static int defaultConfId
Definition: MolWriters.h:22
void setNumDigits(unsigned int numDigits)
Definition: MolWriters.h:259
void setWriteNames(bool state=true)
Definition: MolWriters.h:256
virtual void setProps(const STR_VECT &propNames)=0
void setForceV3000(bool val)
Definition: MolWriters.h:182
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
bool getKekulize() const
Definition: MolWriters.h:186
unsigned int getNumDigits() const
Definition: MolWriters.h:260
virtual void write(const ROMol &mol, int confId=defaultConfId)=0
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:180
#define PRECONDITION(expr, mess)
Definition: Invariant.h:107
void setWrite2D(bool state=true)
Definition: MolWriters.h:253
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:315
void flush()
flush the ostream
Definition: MolWriters.h:226
std::vector< std::string > STR_VECT
Definition: Dict.h:26
void setKekulize(bool val)
Definition: MolWriters.h:185