RDKit
Open-source cheminformatics and machine learning.
FileParsers.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2013 Greg Landrum, Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef _RD_FILEPARSERS_H
11 #define _RD_FILEPARSERS_H
12 
13 #include <RDGeneral/types.h>
14 #include <GraphMol/RDKitBase.h>
15 
16 #include <string>
17 #include <iostream>
18 #include <vector>
19 #include <exception>
20 
21 #include <boost/shared_ptr.hpp>
22 
23 namespace RDKit {
24 const int MOLFILE_MAXLINE = 256;
25 std::string strip(const std::string &orig);
26 
27 //-----
28 // mol files
29 //-----
30 typedef std::vector<RWMOL_SPTR> RWMOL_SPTR_VECT;
31 // \brief construct a molecule from MDL mol data in a stream
32 /*!
33  * \param inStream - stream containing the data
34  * \param line - current line number (used for error reporting)
35  * \param sanitize - toggles sanitization and stereochemistry
36  * perception of the molecule
37  * \param removeHs - toggles removal of Hs from the molecule. H removal
38  * is only done if the molecule is sanitized
39  * \param line - current line number (used for error reporting)
40  * \param strictParsing - if not set, the parser is more lax about correctness
41  * of the contents.
42  *
43  */
44 RWMol *MolDataStreamToMol(std::istream *inStream, unsigned int &line,
45  bool sanitize = true, bool removeHs = true,
46  bool strictParsing = true);
47 // \overload
48 RWMol *MolDataStreamToMol(std::istream &inStream, unsigned int &line,
49  bool sanitize = true, bool removeHs = true,
50  bool strictParsing = true);
51 // \brief construct a molecule from an MDL mol block
52 /*!
53  * \param molBlock - string containing the mol block
54  * \param sanitize - toggles sanitization and stereochemistry
55  * perception of the molecule
56  * \param removeHs - toggles removal of Hs from the molecule. H removal
57  * is only done if the molecule is sanitized
58  * \param strictParsing - if set, the parser is more lax about correctness
59  * of the contents.
60  */
61 RWMol *MolBlockToMol(const std::string &molBlock, bool sanitize = true,
62  bool removeHs = true, bool strictParsing = true);
63 
64 // \brief construct a molecule from an MDL mol file
65 /*!
66  * \param fName - string containing the file name
67  * \param sanitize - toggles sanitization and stereochemistry
68  * perception of the molecule
69  * \param removeHs - toggles removal of Hs from the molecule. H removal
70  * is only done if the molecule is sanitized
71  * \param strictParsing - if set, the parser is more lax about correctness
72  * of the contents.
73  */
74 RWMol *MolFileToMol(const std::string &fName, bool sanitize = true,
75  bool removeHs = true, bool strictParsing = true);
76 
77 // \brief generates an MDL mol block for a molecule
78 /*!
79  * \param mol - the molecule in question
80  * \param includeStereo - toggles inclusion of stereochemistry information
81  * \param confId - selects the conformer to be used
82  * \param kekulize - triggers kekulization of the molecule before it is
83  * written
84  * \param forceV3000 - force generation a V3000 mol block (happens
85  * automatically with
86  * more than 999 atoms or bonds)
87  */
88 std::string MolToMolBlock(const ROMol &mol, bool includeStereo = true,
89  int confId = -1, bool kekulize = true,
90  bool forceV3000 = false);
91 // \brief Writes a molecule to an MDL mol file
92 /*!
93  * \param mol - the molecule in question
94  * \param fName - the name of the file to use
95  * \param includeStereo - toggles inclusion of stereochemistry information
96  * \param confId - selects the conformer to be used
97  * \param kekulize - triggers kekulization of the molecule before it is
98  * written
99  * \param forceV3000 - force generation a V3000 mol block (happens
100  * automatically with
101  * more than 999 atoms or bonds)
102  */
103 void MolToMolFile(const ROMol &mol, const std::string &fName,
104  bool includeStereo = true, int confId = -1,
105  bool kekulize = true, bool forceV3000 = false);
106 
107 //-----
108 // TPL handling:
109 //-----
110 
111 //! \brief translate TPL data (BioCad format) into a multi-conf molecule
112 /*!
113  \param inStream: the stream from which to read
114  \param line: used to track the line number of errors
115  \param sanitize: toggles sanitization and stereochemistry
116  perception of the molecule
117  \param skipFirstConf: according to the TPL format description, the atomic
118  coords in the atom-information block describe the first
119  conformation and the first conf block describes second
120  conformation. The CombiCode, on the other hand, writes
121  the first conformation data both to the atom-information
122  block and to the first conf block. We want to be able to
123  read CombiCode-style tpls, so we'll allow this
124  mis-feature
125  to be parsed when this flag is set.
126 */
127 RWMol *TPLDataStreamToMol(std::istream *inStream, unsigned int &line,
128  bool sanitize = true, bool skipFirstConf = false);
129 
130 //! \brief construct a multi-conf molecule from a TPL (BioCad format) file
131 /*!
132  \param fName: the name of the file from which to read
133  \param sanitize: toggles sanitization and stereochemistry
134  perception of the molecule
135  \param skipFirstConf: according to the TPL format description, the atomic
136  coords in the atom-information block describe the first
137  conformation and the first conf block describes second
138  conformation. The CombiCode, on the other hand, writes
139  the first conformation data both to the atom-information
140  block and to the first conf block. We want to be able to
141  read CombiCode-style tpls, so we'll allow this
142  mis-feature
143  to be parsed when this flag is set.
144 */
145 RWMol *TPLFileToMol(const std::string &fName, bool sanitize = true,
146  bool skipFirstConf = false);
147 
148 std::string MolToTPLText(
149  const ROMol &mol, const std::string &partialChargeProp = "_GasteigerCharge",
150  bool writeFirstConfTwice = false);
151 void MolToTPLFile(const ROMol &mol, const std::string &fName,
152  const std::string &partialChargeProp = "_GasteigerCharge",
153  bool writeFirstConfTwice = false);
154 
155 //-----
156 // MOL2 handling
157 //-----
158 
159 typedef enum {
160  CORINA = 0 //! supports output from Corina and some dbtranslate output
161 } Mol2Type;
162 
163 // \brief construct a molecule from a Tripos mol2 file
164 /*!
165  *
166  * \param fName - string containing the file name
167  * \param sanitize - toggles sanitization of the molecule
168  * \param removeHs - toggles removal of Hs from the molecule. H removal
169  * is only done if the molecule is sanitized
170  * \param variant - the atom type definitions to use
171  */
172 RWMol *Mol2FileToMol(const std::string &fName, bool sanitize = true,
173  bool removeHs = true, Mol2Type variant = CORINA);
174 
175 // \brief construct a molecule from Tripos mol2 data in a stream
176 /*!
177  * \param inStream - stream containing the data
178  * \param sanitize - toggles sanitization of the molecule
179  * \param removeHs - toggles removal of Hs from the molecule. H removal
180  * is only done if the molecule is sanitized
181  * \param variant - the atom type definitions to use
182  */
183 RWMol *Mol2DataStreamToMol(std::istream *inStream, bool sanitize = true,
184  bool removeHs = true, Mol2Type variant = CORINA);
185 // \overload
186 RWMol *Mol2DataStreamToMol(std::istream &inStream, bool sanitize = true,
187  bool removeHs = true, Mol2Type variant = CORINA);
188 
189 // \brief construct a molecule from a Tripos mol2 block
190 /*!
191  * \param molBlock - string containing the mol block
192  * \param sanitize - toggles sanitization of the molecule
193  * \param removeHs - toggles removal of Hs from the molecule. H removal
194  * is only done if the molecule is sanitized
195  * \param variant - the atom type definitions to use
196  */
197 RWMol *Mol2BlockToMol(const std::string &molBlock, bool sanitize = true,
198  bool removeHs = true, Mol2Type variant = CORINA);
199 
200 RWMol *PDBBlockToMol(const char *str, bool sanitize = true,
201  bool removeHs = true, unsigned int flavor = 0);
202 
203 RWMol *PDBBlockToMol(const std::string &str, bool sanitize = true,
204  bool removeHs = true, unsigned int flavor = 0);
205 RWMol *PDBDataStreamToMol(std::istream *inStream, bool sanitize = true,
206  bool removeHs = true, unsigned int flavor = 0);
207 RWMol *PDBDataStreamToMol(std::istream &inStream, bool sanitize = true,
208  bool removeHs = true, unsigned int flavor = 0);
209 RWMol *PDBFileToMol(const std::string &fname, bool sanitize = true,
210  bool removeHs = true, unsigned int flavor = 0);
211 
212 // \brief generates an PDB block for a molecule
213 /*!
214  * \param mol - the molecule in question
215  * \param confId - selects the conformer to be used
216  * \param flavor - controls what gets written:
217  * flavor & 1 : Write MODEL/ENDMDL lines around each record
218  * flavor & 2 : Don't write any CONECT records
219  * flavor & 4 : Write CONECT records in both directions
220  * flavor & 8 : Don't use multiple CONECTs to encode bond order
221  * flavor & 16 : Write MASTER record
222  * flavor & 32 : Write TER record
223  */
224 std::string MolToPDBBlock(const ROMol &mol, int confId = -1,
225  unsigned int flavor = 0);
226 // \brief Writes a molecule to an MDL mol file
227 /*!
228  * \param mol - the molecule in question
229  * \param fName - the name of the file to use
230  * \param confId - selects the conformer to be used
231  * \param flavor - controls what gets written:
232  * flavor & 1 : Write MODEL/ENDMDL lines around each record
233  * flavor & 2 : Don't write any CONECT records
234  * flavor & 4 : Write CONECT records in both directions
235  * flavor & 8 : Don't use multiple CONECTs to encode bond order
236  * flavor & 16 : Write MASTER record
237  * flavor & 32 : Write TER record
238  */
239 void MolToPDBFile(const ROMol &mol, const std::string &fname, int confId = -1,
240  unsigned int flavor = 0);
241 }
242 
243 #endif
void MolToMolFile(const ROMol &mol, const std::string &fName, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
RWMol * Mol2FileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA)
std::vector< RWMOL_SPTR > RWMOL_SPTR_VECT
Definition: FileParsers.h:30
RWMol * PDBDataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, unsigned int flavor=0)
RWMol * PDBBlockToMol(const char *str, bool sanitize=true, bool removeHs=true, unsigned int flavor=0)
ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RWMol * Mol2DataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA)
const int MOLFILE_MAXLINE
Definition: FileParsers.h:24
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:30
RWMol * MolDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RWMol * MolFileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RWMol * TPLFileToMol(const std::string &fName, bool sanitize=true, bool skipFirstConf=false)
construct a multi-conf molecule from a TPL (BioCad format) file
std::string MolToMolBlock(const ROMol &mol, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
pulls in the core RDKit functionality
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:106
void MolToPDBFile(const ROMol &mol, const std::string &fname, int confId=-1, unsigned int flavor=0)
std::string strip(const std::string &orig)
RWMol * Mol2BlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA)
std::string MolToPDBBlock(const ROMol &mol, int confId=-1, unsigned int flavor=0)
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
std::string MolToTPLText(const ROMol &mol, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
RWMol * TPLDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool skipFirstConf=false)
translate TPL data (BioCad format) into a multi-conf molecule
RWMol * PDBFileToMol(const std::string &fname, bool sanitize=true, bool removeHs=true, unsigned int flavor=0)
RWMol * MolBlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
void MolToTPLFile(const ROMol &mol, const std::string &fName, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)