RDKit
Open-source cheminformatics and machine learning.
SLNParseOps.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2008, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior
18 // written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Created by Greg Landrum, September 2006
33 //
34 #ifndef __RD_SLNPARSEOPS_H__
35 #define __RD_SLNPARSEOPS_H__
36 
37 #include <vector>
40 #include <GraphMol/RDKitBase.h>
41 #include <GraphMol/RDKitQueries.h>
42 #include <boost/lexical_cast.hpp>
43 
44 namespace RDKit {
45 namespace SLNParse {
46 namespace {
47 //! set a bookmark in the molecule if the atom has an associated ID:
48 void bookmarkAtomID(RWMol *mp, Atom *atom) {
49  PRECONDITION(mp, "bad molecule");
50  PRECONDITION(atom, "bad atom");
51  unsigned int label;
52  if (atom->getPropIfPresent(common_properties::_AtomID, label)) {
53  if (mp->hasAtomBookmark(label)) {
54  std::stringstream err;
55  err << "SLN Parser error: Atom ID " << label << " used a second time.";
56  throw SLNParseException(err.str());
57  }
58  if (mp->hasBondBookmark(label)) {
59  std::stringstream err;
60  err << "SLN Parser error: Atom ID " << label
61  << " appears *after* its ring closure.";
62  throw SLNParseException(err.str());
63  }
64  mp->setAtomBookmark(atom, label);
65  }
66 }
67 
68 //! adds a bond, being careful to handle aromaticity properly
69 template <typename BondType>
70 void addBondToMol(RWMol *mp, BondType *bond) {
71  PRECONDITION(mp, "null molecule");
72  PRECONDITION(bond, "null bond");
73  mp->addBond(bond, true);
74  if (bond->getBondType() == Bond::AROMATIC) {
75  // SLN doesn't have aromatic atom types, aromaticity is a property
76  // of the bonds themselves, so we need to set the atom types:
77  bond->setIsAromatic(true);
78  bond->getBeginAtom()->setIsAromatic(true);
79  bond->getEndAtom()->setIsAromatic(true);
80  }
81 }
82 } // end of anonymous namespace
83 
84 // ------------------------------------------------------------------------------------
85 //! initialize a molecule
86 template <typename AtomType>
87 int startMol(std::vector<RWMol *> &molList, AtomType *firstAtom,
88  bool doingQuery) {
89  PRECONDITION(firstAtom, "empty atom");
90  RWMol *mp = new RWMol();
91  mp->addAtom(firstAtom, true, true);
92  bookmarkAtomID(mp, firstAtom);
93 
94  if (!doingQuery) {
95  // add any hydrogens that are set on the atom, otherwise getting the
96  // numbering right
97  // is just too hard:
98  for (unsigned int i = 0; i < firstAtom->getNumExplicitHs(); ++i) {
99  int hIdx = mp->addAtom(new Atom(1), false, true);
100  mp->addBond(0, hIdx, Bond::SINGLE);
101  }
102  firstAtom->setNumExplicitHs(0);
103  }
104 
105  int sz = molList.size();
106  molList.push_back(mp);
107  return sz;
108 };
109 
110 // ------------------------------------------------------------------------------------
111 //! adds an atom to a molecule
112 template <typename AtomType, typename BondType>
113 void addAtomToMol(std::vector<RWMol *> &molList, unsigned int idx,
114  AtomType *atom, BondType *bond, bool doingQuery) {
115  PRECONDITION(idx < molList.size(), "bad index");
116  RWMol *mp = molList[idx];
117  PRECONDITION(mp, "null molecule");
118  PRECONDITION(atom, "empty atom");
119  PRECONDITION(bond, "null bond");
120 
121  Atom *a1 = mp->getActiveAtom();
122  int atomIdx1 = a1->getIdx();
123  int atomIdx2 = mp->addAtom(atom, true, true);
124  bookmarkAtomID(mp, atom);
125  bond->setOwningMol(mp);
126  bond->setBeginAtomIdx(atomIdx1);
127  bond->setEndAtomIdx(atomIdx2);
128  addBondToMol(mp, bond);
129 
130  if (!doingQuery) {
131  // add any hydrogens that are set on the atom, otherwise getting the
132  // numbering right
133  // is just too hard:
134  for (unsigned int i = 0; i < atom->getNumExplicitHs(); ++i) {
135  int hIdx = mp->addAtom(new Atom(1), false, true);
136  mp->addBond(atomIdx2, hIdx, Bond::SINGLE);
137  }
138  atom->setNumExplicitHs(0);
139  }
140 }
141 //! \overload
142 template <typename AtomType>
143 void addAtomToMol(std::vector<RWMol *> &molList, unsigned int idx,
144  AtomType *atom, bool doingQuery) {
145  addAtomToMol(molList, idx, atom, new Bond(Bond::SINGLE), doingQuery);
146 }
147 
148 // ------------------------------------------------------------------------------------
149 //! closes an indexed ring in a molecule using the bond provided
150 // The bond is formed from the atom in the molecule with the
151 // corresponding bookmark to the active atom
152 //
153 template <typename BondType>
154 void closeRingBond(std::vector<RWMol *> &molList, unsigned int molIdx,
155  unsigned int ringIdx, BondType *bond,
156  bool postponeAllowed = true) {
157  PRECONDITION(molIdx < molList.size(), "bad index");
158  RWMol *mp = molList[molIdx];
159  PRECONDITION(mp, "null molecule");
160  PRECONDITION(bond, "Null bond");
161 
162  if (!mp->hasAtomBookmark(ringIdx)) {
163  if (postponeAllowed) {
164  // save it for later:
165  bond->setOwningMol(mp);
166  bond->setEndAtomIdx(mp->getActiveAtom()->getIdx());
167  mp->setBondBookmark(bond, ringIdx);
168  return;
169  } else {
170  std::stringstream err;
171  err << "SLN Parser error: Ring closure " << ringIdx
172  << " does not have a corresponding opener.";
173  throw SLNParseException(err.str());
174  }
175  }
176  Atom *opener = mp->getAtomWithBookmark(ringIdx);
177  CHECK_INVARIANT(opener, "invalid atom");
178 
179  Atom *closer = mp->getActiveAtom();
180  bond->setOwningMol(mp);
181  bond->setBeginAtom(opener);
182  bond->setEndAtom(closer);
183  addBondToMol(mp, bond);
184 };
185 //! \overload
186 void closeRingBond(std::vector<RWMol *> &molList, unsigned int molIdx,
187  unsigned int ringIdx) {
188  closeRingBond(molList, molIdx, ringIdx, new Bond(Bond::SINGLE));
189 };
190 
191 // ------------------------------------------------------------------------------------
192 // NOTE: this takes over responsibility for the bond
193 template <typename BondType>
194 int addBranchToMol(std::vector<RWMol *> &molList, unsigned int molIdx,
195  unsigned int branchIdx, BondType *&bond) {
196  PRECONDITION(molIdx < molList.size(), "bad index");
197  RWMol *mp = molList[molIdx];
198  PRECONDITION(mp, "null molecule");
199  PRECONDITION(branchIdx < molList.size(), "bad index");
200  RWMol *branch = molList[branchIdx];
201  PRECONDITION(branch, "null branch");
202  PRECONDITION(bond, "null bond");
203 
204  unsigned int activeAtomIdx = mp->getActiveAtom()->getIdx();
205  unsigned int nOrigAtoms = mp->getNumAtoms();
206 
207  //
208  // Add the fragment's atoms and bonds to the molecule:
209  //
210  mp->insertMol(*branch);
211 
212  // copy in any atom bookmarks from the branch:
213  for (ROMol::ATOM_BOOKMARK_MAP::const_iterator bmIt =
214  branch->getAtomBookmarks()->begin();
215  bmIt != branch->getAtomBookmarks()->end(); ++bmIt) {
216  if (bmIt->first < 0) continue;
217  if (mp->hasAtomBookmark(bmIt->first)) {
218  std::stringstream err;
219  err << "SLN Parser error: Atom ID " << bmIt->first
220  << " used a second time.";
221  throw SLNParseException(err.str());
222  } else if (mp->hasBondBookmark(bmIt->first)) {
223  std::stringstream err;
224  err << "SLN Parser error: Atom ID " << bmIt->first
225  << " appears *after* its ring closure.";
226  throw SLNParseException(err.str());
227  } else {
228  CHECK_INVARIANT(bmIt->second.size() == 1,
229  "bad atom bookmark list on branch");
230  Atom *tgtAtom =
231  mp->getAtomWithIdx((*bmIt->second.begin())->getIdx() + nOrigAtoms);
232  mp->setAtomBookmark(tgtAtom, bmIt->first);
233  }
234  }
235 
236  // loop over bond bookmarks in the branch and close the corresponding rings
237  for (ROMol::BOND_BOOKMARK_MAP::const_iterator bmIt =
238  branch->getBondBookmarks()->begin();
239  bmIt != branch->getBondBookmarks()->end(); ++bmIt) {
240  CHECK_INVARIANT(bmIt->second.size() >= 1,
241  "bad bond bookmark list on branch");
242  for (ROMol::BOND_PTR_LIST::const_iterator bondIt = bmIt->second.begin();
243  bondIt != bmIt->second.end(); ++bondIt) {
244  Bond *tgtBond = *bondIt;
245  if (bmIt->first > 0 && mp->hasAtomBookmark(bmIt->first)) {
246  Atom *tmpAtom = mp->getActiveAtom();
247  mp->setActiveAtom(
248  mp->getAtomWithIdx(tgtBond->getEndAtomIdx() + nOrigAtoms));
249  closeRingBond(molList, molIdx, bmIt->first, tgtBond, false);
250  mp->setActiveAtom(tmpAtom);
251  } else {
252  // no partner found yet, copy into this mol:
253  tgtBond->setOwningMol(mp);
254  tgtBond->setEndAtomIdx(tgtBond->getEndAtomIdx() + nOrigAtoms);
255  mp->setBondBookmark(tgtBond, bmIt->first);
256  }
257  }
258  }
259 
260  // set the connecting bond:
261  if (bond->getBondType() != Bond::IONIC) {
262  bond->setOwningMol(mp);
263  bond->setBeginAtomIdx(activeAtomIdx);
264  bond->setEndAtomIdx(nOrigAtoms);
265  addBondToMol(mp, bond);
266  } else {
267  delete bond;
268  }
269  bond = 0;
270 
271  delete branch;
272  unsigned int sz = molList.size();
273  if (sz == branchIdx + 1) {
274  molList.resize(sz - 1);
275  }
276  return molIdx;
277 };
278 //! \overload
279 int addBranchToMol(std::vector<RWMol *> &molList, unsigned int molIdx,
280  unsigned int branchIdx) {
281  Bond *newBond = new Bond(Bond::SINGLE);
282  return addBranchToMol(molList, molIdx, branchIdx, newBond);
283 };
284 
285 // ------------------------------------------------------------------------------------
286 //! adds the atoms and bonds from a fragment to the molecule, sets no bond
287 // between them
288 int addFragToMol(std::vector<RWMol *> &molList, unsigned int molIdx,
289  unsigned int fragIdx) {
290  Bond *newBond = new Bond(Bond::IONIC);
291  return addBranchToMol(molList, molIdx, fragIdx, newBond);
292 }
293 
294 //! convenience function to convert the argument to a string
295 template <typename T>
296 std::string convertToString(T val) {
297  std::string res = boost::lexical_cast<std::string>(val);
298  return res;
299 }
300 
301 void CleanupAfterParseError(RWMol *mol) {
302  PRECONDITION(mol, "no molecule");
303  // blow out any partial bonds:
304  RWMol::BOND_BOOKMARK_MAP *marks = mol->getBondBookmarks();
305  RWMol::BOND_BOOKMARK_MAP::iterator markI = marks->begin();
306  while (markI != marks->end()) {
307  RWMol::BOND_PTR_LIST &bonds = markI->second;
308  for (RWMol::BOND_PTR_LIST::iterator bondIt = bonds.begin();
309  bondIt != bonds.end(); ++bondIt) {
310  delete *bondIt;
311  }
312  ++markI;
313  }
314 }
315 } // end of namespace SLNParse
316 } // end of namespace RDKit
317 #endif
std::string convertToString(T val)
convenience function to convert the argument to a string
Definition: SLNParseOps.h:296
int addBranchToMol(std::vector< RWMol * > &molList, unsigned int molIdx, unsigned int branchIdx, BondType *&bond)
Definition: SLNParseOps.h:194
int addFragToMol(std::vector< RWMol * > &molList, unsigned int molIdx, unsigned int fragIdx)
adds the atoms and bonds from a fragment to the molecule, sets no bond
Definition: SLNParseOps.h:288
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:30
unsigned int addAtom(bool updateLabel=true)
adds an empty Atom to our collection
void closeRingBond(std::vector< RWMol * > &molList, unsigned int molIdx, unsigned int ringIdx, BondType *bond, bool postponeAllowed=true)
closes an indexed ring in a molecule using the bond provided
Definition: SLNParseOps.h:154
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:99
pulls in the RDKit Query functionality
unsigned int getIdx() const
returns our index within the ROMol
Definition: Atom.h:130
pulls in the core RDKit functionality
int startMol(std::vector< RWMol * > &molList, AtomType *firstAtom, bool doingQuery)
initialize a molecule
Definition: SLNParseOps.h:87
void addAtomToMol(std::vector< RWMol * > &molList, unsigned int idx, AtomType *atom, BondType *bond, bool doingQuery)
adds an atom to a molecule
Definition: SLNParseOps.h:113
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
unsigned int getEndAtomIdx() const
returns the index of our end Atom
Definition: Bond.h:183
class for representing a bond
Definition: Bond.h:47
void setOwningMol(ROMol *other)
sets our owning molecule
void setOwningMol(ROMol *other)
sets our owning molecule
void CleanupAfterParseError(RWMol *mol)
Definition: SLNParseOps.h:301
void setEndAtomIdx(unsigned int what)
sets the index of our end Atom
const std::string _AtomID
#define PRECONDITION(expr, mess)
Definition: Invariant.h:107
The class for representing atoms.
Definition: Atom.h:68
unsigned int addBond(unsigned int beginAtomIdx, unsigned int endAtomIdx, Bond::BondType order=Bond::UNSPECIFIED)
adds a Bond between the indicated Atoms