RDKit
Open-source cheminformatics and machine learning.
SequenceParsers.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2015,2016 Greg Landrum and NextMove Software
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef _RD_SEQUENCEPARSE_H_
11 #define _RD_SEQUENCEPARSE_H_
12 #include <string>
13 
14 namespace RDKit {
15 class RWMol;
16 
17 // \brief construct a molecule from a sequence string (currently only supports
18 // peptides)
19 /*!
20  * \param seq - the string to be processed
21  * \param sanitize - toggles sanitization and stereochemistry perception of
22  *the molecule
23  * \param lowerD - if set, lower case letters will be parsed as the d form
24  *of the corresponding amino acid
25  *
26  */
27 RWMol *SequenceToMol(const char *seq, bool sanitize, bool lowerD);
28 //! \overload
29 RWMol *SequenceToMol(const std::string &seq, bool sanitize, bool lowerD);
30 
31 // \brief construct a protein, RNA or DNA molecule from a sequence string
32 /*!
33  * \param seq - the string to be processed
34  * \param sanitize - toggles sanitization and stereochemistry perception of
35  *the molecule
36  * \param flavor -
37  * 0 Protein, L amino acids (default)
38  * 1 Protein, D amino acids
39  * 2 RNA, no cap
40  * 3 RNA, 5' cap
41  * 4 RNA, 3' cap
42  * 5 RNA, both caps
43  * 6 DNA, no cap
44  * 7 DNA, 5' cap
45  * 8 DNA, 3' cap
46  * 9 DNA, both caps
47  *
48  */
49 RWMol *SequenceToMol(const char *seq, bool sanitize = true, int flavor = 0);
50 //! \overload
51 RWMol *SequenceToMol(const std::string &seq, bool sanitize = true,
52  int flavor = 0);
53 
54 // \brief construct a molecule from a FASTA string (currently only supports
55 // peptides)
56 /*!
57  * \param seq - the string to be processed
58  * \param sanitize - toggles sanitization and stereochemistry perception of
59  *the molecule
60  * \param lowerD - if set, lower case letters will be parsed as the d form
61  *of the corresponding amino acid
62  *
63  */
64 RWMol *FASTAToMol(const char *seq, bool sanitize, bool lowerD);
65 //! \overload
66 RWMol *FASTAToMol(const std::string &seq, bool sanitize, bool lowerD);
67 
68 // \brief construct a protein, DNA or RNA molecule from a FASTA string
69 /*!
70  * \param seq - the string to be processed
71  * \param sanitize - toggles sanitization and stereochemistry perception of
72  *the molecule
73  * \param flavor -
74  * 0 Protein, L amino acids (default)
75  * 1 Protein, D amino acids
76  * 2 RNA, no cap
77  * 3 RNA, 5' cap
78  * 4 RNA, 3' cap
79  * 5 RNA, both caps
80  * 6 DNA, no cap
81  * 7 DNA, 5' cap
82  * 8 DNA, 3' cap
83  * 9 DNA, both caps
84  *
85  */
86 RWMol *FASTAToMol(const char *seq, bool sanitize = true, int flavor = 0);
87 //! \overload
88 RWMol *FASTAToMol(const std::string &seq, bool sanitize = true, int flavor = 0);
89 
90 // \brief construct a molecule from a HELM string (currently only supports
91 // peptides)
92 /*!
93  * \param seq - the string to be processed
94  * \param sanitize - toggles sanitization and stereochemistry perception of
95  *the molecule
96  *
97  */
98 RWMol *HELMToMol(const char *helm, bool sanitize = true);
99 //! \overload
100 RWMol *HELMToMol(const std::string &helm, bool sanitize = true);
101 }
102 
103 #endif
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
RWMol * SequenceToMol(const char *seq, bool sanitize, bool lowerD)
RWMol * FASTAToMol(const char *seq, bool sanitize, bool lowerD)
RWMol * HELMToMol(const char *helm, bool sanitize=true)