RDKit
Open-source cheminformatics and machine learning.
SLNParseOps.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2008, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior
18 // written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Created by Greg Landrum, September 2006
33 //
34 #ifndef __RD_SLNPARSEOPS_H__
35 #define __RD_SLNPARSEOPS_H__
36 
37 #include <vector>
40 #include <GraphMol/RDKitBase.h>
41 #include <GraphMol/RDKitQueries.h>
43 #include <boost/lexical_cast.hpp>
45 
46 namespace RDKit {
47 namespace SLNParse {
48 namespace {
49 //! set a bookmark in the molecule if the atom has an associated ID:
50 void bookmarkAtomID(RWMol *mp, Atom *atom) {
51  PRECONDITION(mp, "bad molecule");
52  PRECONDITION(atom, "bad atom");
53  unsigned int label;
54  if (atom->getPropIfPresent(common_properties::_AtomID, label)) {
55  if (mp->hasAtomBookmark(label)) {
56  std::stringstream err;
57  err << "SLN Parser error: Atom ID " << label << " used a second time.";
58  throw SLNParseException(err.str());
59  }
60  if (mp->hasBondBookmark(label)) {
61  std::stringstream err;
62  err << "SLN Parser error: Atom ID " << label
63  << " appears *after* its ring closure.";
64  throw SLNParseException(err.str());
65  }
66  mp->setAtomBookmark(atom, label);
67  }
68 }
69 
70 //! adds a bond, being careful to handle aromaticity properly
71 template <typename BondType>
72 void addBondToMol(RWMol *mp, BondType *bond) {
73  PRECONDITION(mp, "null molecule");
74  PRECONDITION(bond, "null bond");
75  mp->addBond(bond, true);
76  if (bond->getBondType() == Bond::AROMATIC) {
77  // SLN doesn't have aromatic atom types, aromaticity is a property
78  // of the bonds themselves, so we need to set the atom types:
79  bond->setIsAromatic(true);
80  bond->getBeginAtom()->setIsAromatic(true);
81  bond->getEndAtom()->setIsAromatic(true);
82  }
83 }
84 } // end of anonymous namespace
85 
86 // ------------------------------------------------------------------------------------
87 //! initialize a molecule
88 template <typename AtomType>
89 int startMol(std::vector<RWMol *> &molList, AtomType *firstAtom,
90  bool doingQuery) {
91  PRECONDITION(firstAtom, "empty atom");
92  RWMol *mp = new RWMol();
93  mp->addAtom(firstAtom, true, true);
94  bookmarkAtomID(mp, firstAtom);
95 
96  if (!doingQuery) {
97  // add any hydrogens that are set on the atom, otherwise getting the
98  // numbering right
99  // is just too hard:
100  for (unsigned int i = 0; i < firstAtom->getNumExplicitHs(); ++i) {
101  int hIdx = mp->addAtom(new Atom(1), false, true);
102  mp->addBond(0, hIdx, Bond::SINGLE);
103  }
104  firstAtom->setNumExplicitHs(0);
105  }
106 
107  int sz = molList.size();
108  molList.push_back(mp);
109  return sz;
110 };
111 
112 // ------------------------------------------------------------------------------------
113 //! adds an atom to a molecule
114 template <typename AtomType, typename BondType>
115 void addAtomToMol(std::vector<RWMol *> &molList, unsigned int idx,
116  AtomType *atom, BondType *bond, bool doingQuery) {
117  PRECONDITION(idx < molList.size(), "bad index");
118  RWMol *mp = molList[idx];
119  PRECONDITION(mp, "null molecule");
120  PRECONDITION(atom, "empty atom");
121  PRECONDITION(bond, "null bond");
122 
123  Atom *a1 = mp->getActiveAtom();
124  int atomIdx1 = a1->getIdx();
125  int atomIdx2 = mp->addAtom(atom, true, true);
126  bookmarkAtomID(mp, atom);
127  bond->setOwningMol(mp);
128  bond->setBeginAtomIdx(atomIdx1);
129  bond->setEndAtomIdx(atomIdx2);
130  addBondToMol(mp, bond);
131 
132  if (!doingQuery) {
133  // add any hydrogens that are set on the atom, otherwise getting the
134  // numbering right
135  // is just too hard:
136  for (unsigned int i = 0; i < atom->getNumExplicitHs(); ++i) {
137  int hIdx = mp->addAtom(new Atom(1), false, true);
138  mp->addBond(atomIdx2, hIdx, Bond::SINGLE);
139  }
140  atom->setNumExplicitHs(0);
141  }
142 }
143 //! \overload
144 template <typename AtomType>
145 void addAtomToMol(std::vector<RWMol *> &molList, unsigned int idx,
146  AtomType *atom, bool doingQuery) {
147  addAtomToMol(molList, idx, atom, new Bond(Bond::SINGLE), doingQuery);
148 }
149 
150 // ------------------------------------------------------------------------------------
151 //! closes an indexed ring in a molecule using the bond provided
152 // The bond is formed from the atom in the molecule with the
153 // corresponding bookmark to the active atom
154 //
155 template <typename BondType>
156 void closeRingBond(std::vector<RWMol *> &molList, unsigned int molIdx,
157  unsigned int ringIdx, BondType *bond,
158  bool postponeAllowed = true) {
159  PRECONDITION(molIdx < molList.size(), "bad index");
160  RWMol *mp = molList[molIdx];
161  PRECONDITION(mp, "null molecule");
162  PRECONDITION(bond, "Null bond");
163 
164  if (!mp->hasAtomBookmark(ringIdx)) {
165  if (postponeAllowed) {
166  // save it for later:
167  bond->setOwningMol(mp);
168  bond->setEndAtomIdx(mp->getActiveAtom()->getIdx());
169  mp->setBondBookmark(bond, ringIdx);
170  return;
171  } else {
172  std::stringstream err;
173  err << "SLN Parser error: Ring closure " << ringIdx
174  << " does not have a corresponding opener.";
175  throw SLNParseException(err.str());
176  }
177  }
178  Atom *opener = mp->getAtomWithBookmark(ringIdx);
179  CHECK_INVARIANT(opener, "invalid atom");
180 
181  Atom *closer = mp->getActiveAtom();
182  bond->setOwningMol(mp);
183  bond->setBeginAtom(opener);
184  bond->setEndAtom(closer);
185  addBondToMol(mp, bond);
186 };
187 //! \overload
188 void closeRingBond(std::vector<RWMol *> &molList, unsigned int molIdx,
189  unsigned int ringIdx) {
190  closeRingBond(molList, molIdx, ringIdx, new Bond(Bond::SINGLE));
191 };
192 
193 // ------------------------------------------------------------------------------------
194 // NOTE: this takes over responsibility for the bond
195 template <typename BondType>
196 int addBranchToMol(std::vector<RWMol *> &molList, unsigned int molIdx,
197  unsigned int branchIdx, BondType *&bond) {
198  PRECONDITION(molIdx < molList.size(), "bad index");
199  RWMol *mp = molList[molIdx];
200  PRECONDITION(mp, "null molecule");
201  PRECONDITION(branchIdx < molList.size(), "bad index");
202  RWMol *branch = molList[branchIdx];
203  PRECONDITION(branch, "null branch");
204  PRECONDITION(bond, "null bond");
205 
206  unsigned int activeAtomIdx = mp->getActiveAtom()->getIdx();
207  unsigned int nOrigAtoms = mp->getNumAtoms();
208 
209  //
210  // Add the fragment's atoms and bonds to the molecule:
211  //
212  mp->insertMol(*branch);
213 
214  // copy in any atom bookmarks from the branch:
215  for (ROMol::ATOM_BOOKMARK_MAP::const_iterator bmIt =
216  branch->getAtomBookmarks()->begin();
217  bmIt != branch->getAtomBookmarks()->end(); ++bmIt) {
218  if (bmIt->first < 0) continue;
219  if (mp->hasAtomBookmark(bmIt->first)) {
220  std::stringstream err;
221  err << "SLN Parser error: Atom ID " << bmIt->first
222  << " used a second time.";
223  throw SLNParseException(err.str());
224  } else if (mp->hasBondBookmark(bmIt->first)) {
225  std::stringstream err;
226  err << "SLN Parser error: Atom ID " << bmIt->first
227  << " appears *after* its ring closure.";
228  throw SLNParseException(err.str());
229  } else {
230  CHECK_INVARIANT(bmIt->second.size() == 1,
231  "bad atom bookmark list on branch");
232  Atom *tgtAtom =
233  mp->getAtomWithIdx((*bmIt->second.begin())->getIdx() + nOrigAtoms);
234  mp->setAtomBookmark(tgtAtom, bmIt->first);
235  }
236  }
237 
238  // loop over bond bookmarks in the branch and close the corresponding rings
239  for (ROMol::BOND_BOOKMARK_MAP::const_iterator bmIt =
240  branch->getBondBookmarks()->begin();
241  bmIt != branch->getBondBookmarks()->end(); ++bmIt) {
242  CHECK_INVARIANT(bmIt->second.size() >= 1,
243  "bad bond bookmark list on branch");
244  for (ROMol::BOND_PTR_LIST::const_iterator bondIt = bmIt->second.begin();
245  bondIt != bmIt->second.end(); ++bondIt) {
246  Bond *tgtBond = *bondIt;
247  if (bmIt->first > 0 && mp->hasAtomBookmark(bmIt->first)) {
248  Atom *tmpAtom = mp->getActiveAtom();
249  mp->setActiveAtom(
250  mp->getAtomWithIdx(tgtBond->getEndAtomIdx() + nOrigAtoms));
251  closeRingBond(molList, molIdx, bmIt->first, tgtBond, false);
252  mp->setActiveAtom(tmpAtom);
253  } else {
254  // no partner found yet, copy into this mol:
255  tgtBond->setOwningMol(mp);
256  tgtBond->setEndAtomIdx(tgtBond->getEndAtomIdx() + nOrigAtoms);
257  mp->setBondBookmark(tgtBond, bmIt->first);
258  }
259  }
260  }
261 
262  // set the connecting bond:
263  if (bond->getBondType() != Bond::IONIC) {
264  bond->setOwningMol(mp);
265  bond->setBeginAtomIdx(activeAtomIdx);
266  bond->setEndAtomIdx(nOrigAtoms);
267  addBondToMol(mp, bond);
268  } else {
269  delete bond;
270  }
271  bond = 0;
272 
273  delete branch;
274  unsigned int sz = molList.size();
275  if (sz == branchIdx + 1) {
276  molList.resize(sz - 1);
277  }
278  return molIdx;
279 };
280 //! \overload
281 int addBranchToMol(std::vector<RWMol *> &molList, unsigned int molIdx,
282  unsigned int branchIdx) {
283  Bond *newBond = new Bond(Bond::SINGLE);
284  return addBranchToMol(molList, molIdx, branchIdx, newBond);
285 };
286 
287 // ------------------------------------------------------------------------------------
288 //! adds the atoms and bonds from a fragment to the molecule, sets no bond
289 // between them
290 int addFragToMol(std::vector<RWMol *> &molList, unsigned int molIdx,
291  unsigned int fragIdx) {
292  Bond *newBond = new Bond(Bond::IONIC);
293  return addBranchToMol(molList, molIdx, fragIdx, newBond);
294 }
295 
296 //! convenience function to convert the argument to a string
297 template <typename T>
298 std::string convertToString(T val) {
299  std::string res = boost::lexical_cast<std::string>(val);
300  return res;
301 }
302 
303 void CleanupAfterParseError(RWMol *mol) {
304  PRECONDITION(mol, "no molecule");
305  // blow out any partial bonds:
306  RWMol::BOND_BOOKMARK_MAP *marks = mol->getBondBookmarks();
307  RWMol::BOND_BOOKMARK_MAP::iterator markI = marks->begin();
308  while (markI != marks->end()) {
309  RWMol::BOND_PTR_LIST &bonds = markI->second;
310  for (RWMol::BOND_PTR_LIST::iterator bondIt = bonds.begin();
311  bondIt != bonds.end(); ++bondIt) {
312  delete *bondIt;
313  }
314  ++markI;
315  }
316 }
317 } // end of namespace SLNParse
318 } // end of namespace RDKit
319 #endif
std::string convertToString(T val)
convenience function to convert the argument to a string
Definition: SLNParseOps.h:298
int addBranchToMol(std::vector< RWMol * > &molList, unsigned int molIdx, unsigned int branchIdx, BondType *&bond)
Definition: SLNParseOps.h:196
int addFragToMol(std::vector< RWMol * > &molList, unsigned int molIdx, unsigned int fragIdx)
adds the atoms and bonds from a fragment to the molecule, sets no bond
Definition: SLNParseOps.h:290
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:30
unsigned int addAtom(bool updateLabel=true)
adds an empty Atom to our collection
void closeRingBond(std::vector< RWMol * > &molList, unsigned int molIdx, unsigned int ringIdx, BondType *bond, bool postponeAllowed=true)
closes an indexed ring in a molecule using the bond provided
Definition: SLNParseOps.h:156
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:99
pulls in the RDKit Query functionality
unsigned int getIdx() const
returns our index within the ROMol
Definition: Atom.h:130
pulls in the core RDKit functionality
int startMol(std::vector< RWMol * > &molList, AtomType *firstAtom, bool doingQuery)
initialize a molecule
Definition: SLNParseOps.h:89
void addAtomToMol(std::vector< RWMol * > &molList, unsigned int idx, AtomType *atom, BondType *bond, bool doingQuery)
adds an atom to a molecule
Definition: SLNParseOps.h:115
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
unsigned int getEndAtomIdx() const
returns the index of our end Atom
Definition: Bond.h:185
class for representing a bond
Definition: Bond.h:47
void setOwningMol(ROMol *other)
sets our owning molecule
void setOwningMol(ROMol *other)
sets our owning molecule
void CleanupAfterParseError(RWMol *mol)
Definition: SLNParseOps.h:303
void setEndAtomIdx(unsigned int what)
sets the index of our end Atom
const std::string _AtomID
#define PRECONDITION(expr, mess)
Definition: Invariant.h:107
The class for representing atoms.
Definition: Atom.h:68
unsigned int addBond(unsigned int beginAtomIdx, unsigned int endAtomIdx, Bond::BondType order=Bond::UNSPECIFIED)
adds a Bond between the indicated Atoms