00001 // 00002 // Copyright (C) 2002-2008 Greg Landrum and Rational Discovery LLC 00003 // 00004 // @@ All Rights Reserved @@ 00005 // 00006 #ifndef _RD_FILEPARSERS_H 00007 #define _RD_FILEPARSERS_H 00008 00009 #include <RDGeneral/types.h> 00010 #include <GraphMol/RDKitBase.h> 00011 00012 #include <string> 00013 #include <iostream> 00014 #include <vector> 00015 #include <exception> 00016 00017 #include <boost/shared_ptr.hpp> 00018 00019 namespace RDKit{ 00020 const int MOLFILE_MAXLINE=256; 00021 std::string strip(const std::string &orig); 00022 00023 //----- 00024 // mol files 00025 //----- 00026 typedef std::vector< RWMOL_SPTR > RWMOL_SPTR_VECT; 00027 // \brief construct a molecule from MDL mol data in a stream 00028 /*! 00029 * \param inStream - stream containing the data 00030 * \param line - current line number (used for error reporting) 00031 * \param sanitize - toggles sanitization and stereochemistry 00032 * perception of the molecule 00033 * \param removeHs - toggles removal of Hs from the molecule. H removal 00034 * is only done if the molecule is sanitized 00035 */ 00036 RWMol *MolDataStreamToMol(std::istream *inStream, unsigned int &line, 00037 bool sanitize=true,bool removeHs=true); 00038 // \overload 00039 RWMol *MolDataStreamToMol(std::istream &inStream, unsigned int &line, 00040 bool sanitize=true,bool removeHs=true); 00041 // \brief construct a molecule from an MDL mol block 00042 /*! 00043 * \param molBlock - string containing the mol block 00044 * \param sanitize - toggles sanitization and stereochemistry 00045 * perception of the molecule 00046 * \param removeHs - toggles removal of Hs from the molecule. H removal 00047 * is only done if the molecule is sanitized 00048 */ 00049 RWMol *MolBlockToMol(const std::string &molBlock, bool sanitize=true, 00050 bool removeHs=true); 00051 00052 // \brief construct a molecule from an MDL mol file 00053 /*! 00054 * \param fName - string containing the file name 00055 * \param sanitize - toggles sanitization and stereochemistry 00056 * perception of the molecule 00057 * \param removeHs - toggles removal of Hs from the molecule. H removal 00058 * is only done if the molecule is sanitized 00059 */ 00060 RWMol *MolFileToMol(std::string fName, bool sanitize=true, 00061 bool removeHs=true); 00062 00063 // \brief generates an MDL mol block for a molecule 00064 /*! 00065 * \param mol - the molecule in question 00066 * \param includeStereo - toggles inclusion of stereochemistry information 00067 * \param confId - selects the conformer to be used 00068 * \param kekulize - triggers kekulization of the molecule before it is written 00069 */ 00070 std::string MolToMolBlock(const ROMol &mol,bool includeStereo=true, 00071 int confId=-1,bool kekulize=true); 00072 // \brief construct a molecule from an MDL mol file 00073 /*! 00074 * \param mol - the molecule in question 00075 * \param fName - the name of the file to use 00076 * \param includeStereo - toggles inclusion of stereochemistry information 00077 * \param confId - selects the conformer to be used 00078 * \param kekulize - triggers kekulization of the molecule before it is written 00079 */ 00080 void MolToMolFile(const ROMol &mol,std::string fName,bool includeStereo=true, 00081 int confId=-1,bool kekulize=true); 00082 00083 00084 //----- 00085 // TPL handling: 00086 //----- 00087 00088 //! \brief translate TPL data (BioCad format) into a multi-conf molecule 00089 /*! 00090 \param inStream: the stream from which to read 00091 \param line: used to track the line number of errors 00092 \param sanitize: toggles sanitization and stereochemistry 00093 perception of the molecule 00094 \param skipFirstConf: according to the TPL format description, the atomic 00095 coords in the atom-information block describe the first 00096 conformation and the first conf block describes second 00097 conformation. The CombiCode, on the other hand, writes 00098 the first conformation data both to the atom-information 00099 block and to the first conf block. We want to be able to 00100 read CombiCode-style tpls, so we'll allow this mis-feature 00101 to be parsed when this flag is set. 00102 */ 00103 RWMol *TPLDataStreamToMol(std::istream *inStream, unsigned int &line, 00104 bool sanitize=true, 00105 bool skipFirstConf=false); 00106 00107 //! \brief construct a multi-conf molecule from a TPL (BioCad format) file 00108 /*! 00109 \param fName: the name of the file from which to read 00110 \param sanitize: toggles sanitization and stereochemistry 00111 perception of the molecule 00112 \param skipFirstConf: according to the TPL format description, the atomic 00113 coords in the atom-information block describe the first 00114 conformation and the first conf block describes second 00115 conformation. The CombiCode, on the other hand, writes 00116 the first conformation data both to the atom-information 00117 block and to the first conf block. We want to be able to 00118 read CombiCode-style tpls, so we'll allow this mis-feature 00119 to be parsed when this flag is set. 00120 */ 00121 RWMol *TPLFileToMol(std::string fName,bool sanitize=true, 00122 bool skipFirstConf=false); 00123 00124 std::string MolToTPLText(const ROMol &mol, 00125 std::string partialChargeProp="_GasteigerCharge", 00126 bool writeFirstConfTwice=false); 00127 void MolToTPLFile(const ROMol &mol,std::string fName, 00128 std::string partialChargeProp="_GasteigerCharge", 00129 bool writeFirstConfTwice=false); 00130 00131 //----- 00132 // MOL2 handling 00133 //----- 00134 00135 typedef enum { 00136 CORINA=0 //! supports output from Corina and some dbtranslate output 00137 } Mol2Type; 00138 00139 // \brief construct a molecule from a Tripos mol2 file 00140 /*! 00141 * 00142 * \param fName - string containing the file name 00143 * \param sanitize - toggles sanitization of the molecule 00144 * \param removeHs - toggles removal of Hs from the molecule. H removal 00145 * is only done if the molecule is sanitized 00146 * \param variant - the atom type definitions to use 00147 */ 00148 RWMol *Mol2FileToMol(std::string fName,bool sanitize=true,bool removeHs=true, 00149 Mol2Type variant=CORINA); 00150 00151 // \brief construct a molecule from Tripos mol2 data in a stream 00152 /*! 00153 * \param inStream - stream containing the data 00154 * \param sanitize - toggles sanitization of the molecule 00155 * \param removeHs - toggles removal of Hs from the molecule. H removal 00156 * is only done if the molecule is sanitized 00157 * \param variant - the atom type definitions to use 00158 */ 00159 RWMol *Mol2DataStreamToMol(std::istream *inStream,bool sanitize=true,bool removeHs=true, 00160 Mol2Type variant=CORINA); 00161 // \overload 00162 RWMol *Mol2DataStreamToMol(std::istream &inStream,bool sanitize=true,bool removeHs=true, 00163 Mol2Type variant=CORINA); 00164 00165 // \brief construct a molecule from a Tripos mol2 block 00166 /*! 00167 * \param molBlock - string containing the mol block 00168 * \param sanitize - toggles sanitization of the molecule 00169 * \param removeHs - toggles removal of Hs from the molecule. H removal 00170 * is only done if the molecule is sanitized 00171 * \param variant - the atom type definitions to use 00172 */ 00173 RWMol *Mol2BlockToMol(const std::string &molBlock,bool sanitize=true,bool removeHs=true, 00174 Mol2Type variant=CORINA); 00175 00176 } 00177 00178 #endif
1.5.6