RDKit
Open-source cheminformatics and machine learning.
new_canon.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Greg Landrum
3 // Adapted from pseudo-code from Roger Sayle
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 
12 #include <RDGeneral/hanoiSort.h>
13 #include <GraphMol/ROMol.h>
14 #include <GraphMol/RingInfo.h>
16 #include <boost/cstdint.hpp>
17 #include <boost/foreach.hpp>
18 #include <boost/dynamic_bitset.hpp>
20 #include <cstring>
21 #include <iostream>
22 #include <cassert>
23 #include <cstring>
24 #include <vector>
25 
26 //#define VERBOSE_CANON 1
27 
28 namespace RDKit {
29 namespace Canon {
30 
31 struct bondholder {
33  unsigned int bondStereo;
34  unsigned int nbrSymClass;
35  unsigned int nbrIdx;
37  : bondType(Bond::UNSPECIFIED),
38  bondStereo(static_cast<unsigned int>(Bond::STEREONONE)),
39  nbrSymClass(0),
40  nbrIdx(0){};
41  bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni,
42  unsigned int nsc)
43  : bondType(bt),
44  bondStereo(static_cast<unsigned int>(bs)),
45  nbrSymClass(nsc),
46  nbrIdx(ni){};
47  bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
48  unsigned int nsc)
49  : bondType(bt), bondStereo(bs), nbrSymClass(nsc), nbrIdx(ni){};
50  bool operator<(const bondholder &o) const {
51  if (bondType != o.bondType) return bondType < o.bondType;
52  if (bondStereo != o.bondStereo) return bondStereo < o.bondStereo;
53  return nbrSymClass < o.nbrSymClass;
54  }
55  static bool greater(const bondholder &lhs, const bondholder &rhs) {
56  if (lhs.bondType != rhs.bondType) return lhs.bondType > rhs.bondType;
57  if (lhs.bondStereo != rhs.bondStereo)
58  return lhs.bondStereo > rhs.bondStereo;
59  return lhs.nbrSymClass > rhs.nbrSymClass;
60  }
61 
62  static int compare(const bondholder &x, const bondholder &y,
63  unsigned int div = 1) {
64  if (x.bondType < y.bondType)
65  return -1;
66  else if (x.bondType > y.bondType)
67  return 1;
68  if (x.bondStereo < y.bondStereo)
69  return -1;
70  else if (x.bondStereo > y.bondStereo)
71  return 1;
72  return x.nbrSymClass / div - y.nbrSymClass / div;
73  }
74 };
75 
76 struct canon_atom {
77  const Atom *atom;
78  int index;
79  unsigned int degree;
80  unsigned int totalNumHs;
81  bool hasRingNbr;
83  int *nbrIds;
84  const std::string *p_symbol; // if provided, this is used to order atoms
85  std::vector<int> neighborNum;
86  std::vector<int> revistedNeighbors;
87  std::vector<bondholder> bonds;
88 
90  : atom(NULL),
91  index(-1),
92  degree(0),
93  totalNumHs(0),
94  hasRingNbr(false),
95  isRingStereoAtom(false),
96  nbrIds(NULL),
97  p_symbol(NULL){};
98 };
99 
100 void updateAtomNeighborIndex(canon_atom *atoms, std::vector<bondholder> &nbrs);
101 
103  canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
104  std::vector<std::pair<unsigned int, unsigned int> > &result);
105 
106 /*
107  * Different types of atom compare functions:
108  *
109  * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
110  *dependent chirality
111  * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
112  *highly symmetrical graphs/molecules
113  * - AtomCompareFunctor: Basic atom compare function which also allows to
114  *include neighbors within the ranking
115  */
116 
118  public:
120  const ROMol *dp_mol;
121  const boost::dynamic_bitset<> *dp_atomsInPlay, *dp_bondsInPlay;
122 
124  : dp_atoms(NULL),
125  dp_mol(NULL),
126  dp_atomsInPlay(NULL),
127  dp_bondsInPlay(NULL){};
129  Canon::canon_atom *atoms, const ROMol &m,
130  const boost::dynamic_bitset<> *atomsInPlay = NULL,
131  const boost::dynamic_bitset<> *bondsInPlay = NULL)
132  : dp_atoms(atoms),
133  dp_mol(&m),
134  dp_atomsInPlay(atomsInPlay),
135  dp_bondsInPlay(bondsInPlay){};
136  int operator()(int i, int j) const {
137  PRECONDITION(dp_atoms, "no atoms");
138  PRECONDITION(dp_mol, "no molecule");
139  PRECONDITION(i != j, "bad call");
140  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
141  return 0;
142  }
143 
144  if ((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay) {
145  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
146  }
147  if ((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay) {
148  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
149  }
150  for (unsigned int ii = 0;
151  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
152  int cmp =
153  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
154  if (cmp) return cmp;
155  }
156 
157  std::vector<std::pair<unsigned int, unsigned int> > swapsi;
158  std::vector<std::pair<unsigned int, unsigned int> > swapsj;
159  if ((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay) {
160  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
161  }
162  if ((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay) {
163  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
164  }
165  for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
166  int cmp = swapsi[ii].second - swapsj[ii].second;
167  if (cmp) return cmp;
168  }
169  return 0;
170  }
171 };
172 
174  public:
176  const ROMol *dp_mol;
177  const boost::dynamic_bitset<> *dp_atomsInPlay, *dp_bondsInPlay;
178 
180  : dp_atoms(NULL),
181  dp_mol(NULL),
182  dp_atomsInPlay(NULL),
183  dp_bondsInPlay(NULL){};
185  Canon::canon_atom *atoms, const ROMol &m,
186  const boost::dynamic_bitset<> *atomsInPlay = NULL,
187  const boost::dynamic_bitset<> *bondsInPlay = NULL)
188  : dp_atoms(atoms),
189  dp_mol(&m),
190  dp_atomsInPlay(atomsInPlay),
191  dp_bondsInPlay(bondsInPlay){};
192  int operator()(int i, int j) const {
193  PRECONDITION(dp_atoms, "no atoms");
194  PRECONDITION(dp_mol, "no molecule");
195  PRECONDITION(i != j, "bad call");
196  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
197  return 0;
198  }
199 
200  if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
201  return -1;
202  } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
203  return 1;
204  }
205 
206  if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
207  return -1;
208  } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
209  return 1;
210  }
211 
212  if ((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay) {
213  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
214  }
215  if ((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay) {
216  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
217  }
218  for (unsigned int ii = 0;
219  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
220  int cmp =
221  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
222  if (cmp) return cmp;
223  }
224 
225  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
226  return -1;
227  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
228  return 1;
229  }
230  return 0;
231  }
232 };
233 
235  unsigned int getAtomRingNbrCode(unsigned int i) const {
236  if (!dp_atoms[i].hasRingNbr) return 0;
237 
238  int *nbrs = dp_atoms[i].nbrIds;
239  unsigned int code = 0;
240  for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
241  if (dp_atoms[nbrs[j]].isRingStereoAtom) {
242  code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
243  }
244  }
245  return code;
246  }
247 
248  int basecomp(int i, int j) const {
249  PRECONDITION(dp_atoms, "no atoms");
250  unsigned int ivi, ivj;
251 
252  // always start with the current class:
253  ivi = dp_atoms[i].index;
254  ivj = dp_atoms[j].index;
255  if (ivi < ivj)
256  return -1;
257  else if (ivi > ivj)
258  return 1;
259 
260  // use the atom-mapping numbers if they were assigned
261  /* boost::any_cast FILED here:
262  std::string molAtomMapNumber_i="";
263  std::string molAtomMapNumber_j="";
264  */
265  int molAtomMapNumber_i = 0;
266  int molAtomMapNumber_j = 0;
267  dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
268  molAtomMapNumber_i);
269  dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
270  molAtomMapNumber_j);
271  if (molAtomMapNumber_i < molAtomMapNumber_j)
272  return -1;
273  else if (molAtomMapNumber_i > molAtomMapNumber_j)
274  return 1;
275 
276  // start by comparing degree
277  ivi = dp_atoms[i].degree;
278  ivj = dp_atoms[j].degree;
279  if (ivi < ivj)
280  return -1;
281  else if (ivi > ivj)
282  return 1;
283 
284  if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
285  if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol))
286  return -1;
287  else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol))
288  return 1;
289  else
290  return 0;
291  }
292  // move onto atomic number
293  ivi = dp_atoms[i].atom->getAtomicNum();
294  ivj = dp_atoms[j].atom->getAtomicNum();
295  if (ivi < ivj)
296  return -1;
297  else if (ivi > ivj)
298  return 1;
299 
300  // isotopes if we're using them
301  if (df_useIsotopes) {
302  ivi = dp_atoms[i].atom->getIsotope();
303  ivj = dp_atoms[j].atom->getIsotope();
304  if (ivi < ivj)
305  return -1;
306  else if (ivi > ivj)
307  return 1;
308  }
309 
310  // nHs
311  ivi = dp_atoms[i].totalNumHs;
312  ivj = dp_atoms[j].totalNumHs;
313  if (ivi < ivj)
314  return -1;
315  else if (ivi > ivj)
316  return 1;
317 
318  // charge
319  ivi = dp_atoms[i].atom->getFormalCharge();
320  ivj = dp_atoms[j].atom->getFormalCharge();
321  if (ivi < ivj)
322  return -1;
323  else if (ivi > ivj)
324  return 1;
325 
326  // chirality if we're using it
327  if (df_useChirality) {
328  // first atom stereochem:
329  ivi = 0;
330  ivj = 0;
331  std::string cipCode;
332  if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
333  cipCode)) {
334  ivi = cipCode == "R" ? 2 : 1;
335  }
336  if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
337  cipCode)) {
338  ivj = cipCode == "R" ? 2 : 1;
339  }
340  if (ivi < ivj)
341  return -1;
342  else if (ivi > ivj)
343  return 1;
344 
345  // can't actually use values here, because they are arbitrary
346  ivi = dp_atoms[i].atom->getChiralTag() != 0;
347  ivj = dp_atoms[j].atom->getChiralTag() != 0;
348  if (ivi < ivj)
349  return -1;
350  else if (ivi > ivj)
351  return 1;
352  }
353  if (df_useChiralityRings) {
354  // ring stereochemistry
355  ivi = getAtomRingNbrCode(i);
356  ivj = getAtomRingNbrCode(j);
357  if (ivi < ivj)
358  return -1;
359  else if (ivi > ivj)
360  return 1;
361  // bond stereo is taken care of in the neighborhood comparison
362  }
363  return 0;
364  }
365 
366  public:
368  const ROMol *dp_mol;
369  const boost::dynamic_bitset<> *dp_atomsInPlay, *dp_bondsInPlay;
374 
376  : dp_atoms(NULL),
377  dp_mol(NULL),
378  dp_atomsInPlay(NULL),
379  dp_bondsInPlay(NULL),
380  df_useNbrs(false),
381  df_useIsotopes(true),
382  df_useChirality(true),
383  df_useChiralityRings(true){};
385  const boost::dynamic_bitset<> *atomsInPlay = NULL,
386  const boost::dynamic_bitset<> *bondsInPlay = NULL)
387  : dp_atoms(atoms),
388  dp_mol(&m),
389  dp_atomsInPlay(atomsInPlay),
390  dp_bondsInPlay(bondsInPlay),
391  df_useNbrs(false),
392  df_useIsotopes(true),
393  df_useChirality(true),
394  df_useChiralityRings(true){};
395  int operator()(int i, int j) const {
396  PRECONDITION(dp_atoms, "no atoms");
397  PRECONDITION(dp_mol, "no molecule");
398  PRECONDITION(i != j, "bad call");
399  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
400  return 0;
401  }
402 
403  int v = basecomp(i, j);
404  if (v) {
405  return v;
406  }
407 
408  if (df_useNbrs) {
409  if ((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay) {
410  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
411  }
412  if ((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay) {
413  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
414  }
415 
416  for (unsigned int ii = 0;
417  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
418  ++ii) {
419  int cmp =
420  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
421  if (cmp) return cmp;
422  }
423 
424  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
425  return -1;
426  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
427  return 1;
428  }
429  }
430  return 0;
431  }
432 };
433 
434 /*
435  * A compare function to discriminate chiral atoms, similar to the CIP rules.
436  * This functionality is currently not used.
437  */
438 
439 const unsigned int ATNUM_CLASS_OFFSET = 10000;
441  void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
442  for (unsigned j = 0; j < nbrs.size(); ++j) {
443  unsigned int nbrIdx = nbrs[j].nbrIdx;
444  if (nbrIdx == ATNUM_CLASS_OFFSET) {
445  // Ignore the Hs
446  continue;
447  }
448  const Atom *nbr = dp_atoms[nbrIdx].atom;
449  nbrs[j].nbrSymClass =
450  nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
451  }
452  std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
453  // FIX: don't want to be doing this long-term
454  }
455 
456  int basecomp(int i, int j) const {
457  PRECONDITION(dp_atoms, "no atoms");
458  unsigned int ivi, ivj;
459 
460  // always start with the current class:
461  ivi = dp_atoms[i].index;
462  ivj = dp_atoms[j].index;
463  if (ivi < ivj)
464  return -1;
465  else if (ivi > ivj)
466  return 1;
467 
468  // move onto atomic number
469  ivi = dp_atoms[i].atom->getAtomicNum();
470  ivj = dp_atoms[j].atom->getAtomicNum();
471  if (ivi < ivj)
472  return -1;
473  else if (ivi > ivj)
474  return 1;
475 
476  // isotopes:
477  ivi = dp_atoms[i].atom->getIsotope();
478  ivj = dp_atoms[j].atom->getIsotope();
479  if (ivi < ivj)
480  return -1;
481  else if (ivi > ivj)
482  return 1;
483 
484  // atom stereochem:
485  ivi = 0;
486  ivj = 0;
487  std::string cipCode;
488  if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
489  cipCode)) {
490  ivi = cipCode == "R" ? 2 : 1;
491  }
492  if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
493  cipCode)) {
494  ivj = cipCode == "R" ? 2 : 1;
495  }
496  if (ivi < ivj)
497  return -1;
498  else if (ivi > ivj)
499  return 1;
500 
501  // bond stereo is taken care of in the neighborhood comparison
502  return 0;
503  }
504 
505  public:
507  const ROMol *dp_mol;
510  : dp_atoms(NULL), dp_mol(NULL), df_useNbrs(false){};
512  : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false){};
513  int operator()(int i, int j) const {
514  PRECONDITION(dp_atoms, "no atoms");
515  PRECONDITION(dp_mol, "no molecule");
516  PRECONDITION(i != j, "bad call");
517  int v = basecomp(i, j);
518  if (v) return v;
519 
520  if (df_useNbrs) {
521  getAtomNeighborhood(dp_atoms[i].bonds);
522  getAtomNeighborhood(dp_atoms[j].bonds);
523 
524  // we do two passes through the neighbor lists. The first just uses the
525  // atomic numbers (by passing the optional 10000 to bondholder::compare),
526  // the second takes the already-computed index into account
527  for (unsigned int ii = 0;
528  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
529  ++ii) {
530  int cmp = bondholder::compare(
531  dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
532  if (cmp) return cmp;
533  }
534  for (unsigned int ii = 0;
535  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
536  ++ii) {
537  int cmp =
538  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
539  if (cmp) return cmp;
540  }
541  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
542  return -1;
543  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
544  return 1;
545  }
546  }
547  return 0;
548  }
549 };
550 
551 /*
552  * Basic canonicalization function to organize the partitions which will be
553  * sorted next.
554  * */
555 
556 template <typename CompareFunc>
557 void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
558  int mode, int *order, int *count, int &activeset,
559  int *next, int *changed, char *touchedPartitions) {
560  unsigned int nAtoms = mol.getNumAtoms();
561  int partition;
562  int symclass = 0;
563  int *start;
564  int offset;
565  int index;
566  int len;
567  int i;
568  // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
569 
570  // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
571  while (activeset != -1) {
572  // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
573  // std::cerr<<" next: ";
574  // for(unsigned int ii=0;ii<nAtoms;++ii){
575  // std::cerr<<ii<<":"<<next[ii]<<" ";
576  // }
577  // std::cerr<<std::endl;
578  // for(unsigned int ii=0;ii<nAtoms;++ii){
579  // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
580  // "<<atoms[order[ii]].index<<std::endl;
581  // }
582 
583  partition = activeset;
584  activeset = next[partition];
585  next[partition] = -2;
586 
587  len = count[partition];
588  offset = atoms[partition].index;
589  start = order + offset;
590  // std::cerr<<"\n\n**************************************************************"<<std::endl;
591  // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
592  // for(unsigned int ii=0;ii<len;++ii){
593  // std::cerr<<" "<<order[offset+ii]+1;
594  // }
595  // std::cerr<<std::endl;
596  // for(unsigned int ii=0;ii<nAtoms;++ii){
597  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
598  // "<<atoms[order[ii]].index<<std::endl;
599  // }
600  hanoisort(start, len, count, changed, compar);
601  // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
602  // std::cerr<<" result:";
603  // for(unsigned int ii=0;ii<nAtoms;++ii){
604  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
605  // "<<atoms[order[ii]].index<<std::endl;
606  // }
607  for (int k = 0; k < len; ++k) {
608  changed[start[k]] = 0;
609  }
610 
611  index = start[0];
612  // std::cerr<<" len:"<<len<<" index:"<<index<<"
613  // count:"<<count[index]<<std::endl;
614  for (i = count[index]; i < len; i++) {
615  index = start[i];
616  if (count[index]) symclass = offset + i;
617  atoms[index].index = symclass;
618  // std::cerr<<" "<<index+1<<"("<<symclass<<")";
619  // if(mode && (activeset<0 || count[index]>count[activeset]) ){
620  // activeset=index;
621  //}
622  for (unsigned j = 0; j < atoms[index].degree; ++j) {
623  changed[atoms[index].nbrIds[j]] = 1;
624  }
625  }
626  // std::cerr<<std::endl;
627 
628  if (mode) {
629  index = start[0];
630  for (i = count[index]; i < len; i++) {
631  index = start[i];
632  for (unsigned j = 0; j < atoms[index].degree; ++j) {
633  unsigned int nbor = atoms[index].nbrIds[j];
634  touchedPartitions[atoms[nbor].index] = 1;
635  }
636  }
637  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
638  if (touchedPartitions[ii]) {
639  partition = order[ii];
640  if ((count[partition] > 1) && (next[partition] == -2)) {
641  next[partition] = activeset;
642  activeset = partition;
643  }
644  touchedPartitions[ii] = 0;
645  }
646  }
647  }
648  }
649 } // end of RefinePartitions()
650 
651 template <typename CompareFunc>
652 void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
653  int mode, int *order, int *count, int &activeset, int *next,
654  int *changed, char *touchedPartitions) {
655  unsigned int nAtoms = mol.getNumAtoms();
656  int partition;
657  int offset;
658  int index;
659  int len;
660  int oldPart = 0;
661 
662  for (unsigned int i = 0; i < nAtoms; i++) {
663  partition = order[i];
664  oldPart = atoms[partition].index;
665  while (count[partition] > 1) {
666  len = count[partition];
667  offset = atoms[partition].index + len - 1;
668  index = order[offset];
669  atoms[index].index = offset;
670  count[partition] = len - 1;
671  count[index] = 1;
672 
673  // test for ions, water molecules with no
674  if (atoms[index].degree < 1) {
675  continue;
676  }
677  for (unsigned j = 0; j < atoms[index].degree; ++j) {
678  unsigned int nbor = atoms[index].nbrIds[j];
679  touchedPartitions[atoms[nbor].index] = 1;
680  changed[nbor] = 1;
681  }
682 
683  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
684  if (touchedPartitions[ii]) {
685  int npart = order[ii];
686  if ((count[npart] > 1) && (next[npart] == -2)) {
687  next[npart] = activeset;
688  activeset = npart;
689  }
690  touchedPartitions[ii] = 0;
691  }
692  }
693  RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
694  changed, touchedPartitions);
695  }
696  // not sure if this works each time
697  if (atoms[partition].index != oldPart) {
698  i -= 1;
699  }
700  }
701 } // end of BreakTies()
702 
703 void CreateSinglePartition(unsigned int nAtoms, int *order, int *count,
704  canon_atom *atoms);
705 
706 void ActivatePartitions(unsigned int nAtoms, int *order, int *count,
707  int &activeset, int *next, int *changed);
708 
709 void rankMolAtoms(const ROMol &mol, std::vector<unsigned int> &res,
710  bool breakTies = true, bool includeChirality = true,
711  bool includeIsotopes = true);
712 
713 void rankFragmentAtoms(const ROMol &mol, std::vector<unsigned int> &res,
714  const boost::dynamic_bitset<> &atomsInPlay,
715  const boost::dynamic_bitset<> &bondsInPlay,
716  const std::vector<std::string> *atomSymbols = NULL,
717  bool breakTies = true, bool includeChirality = true,
718  bool includeIsotopes = true);
719 
720 void chiralRankMolAtoms(const ROMol &mol, std::vector<unsigned int> &res);
721 
722 void initCanonAtoms(const ROMol &mol, std::vector<Canon::canon_atom> &atoms,
723  bool includeChirality = true);
724 
725 } // end of Canon namespace
726 } // end of RDKit namespace
int operator()(int i, int j) const
Definition: new_canon.h:513
const std::string * p_symbol
Definition: new_canon.h:84
void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
const boost::dynamic_bitset * dp_bondsInPlay
Definition: new_canon.h:121
Bond::BondType bondType
Definition: new_canon.h:32
std::vector< int > neighborNum
Definition: new_canon.h:85
void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
unsigned int totalNumHs
Definition: new_canon.h:80
unsigned int nbrIdx
Definition: new_canon.h:35
void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
const std::string molAtomMapNumber
Defines the primary molecule class ROMol as well as associated typedefs.
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition: hanoiSort.h:140
BondStereo
the nature of the bond&#39;s stereochem (for cis/trans)
Definition: Bond.h:96
unsigned int getNumAtoms(bool onlyExplicit=1) const
returns our number of atoms
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=NULL, const boost::dynamic_bitset<> *bondsInPlay=NULL)
Definition: new_canon.h:384
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc)
Definition: new_canon.h:41
BondType
the type of Bond
Definition: Bond.h:57
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:106
const boost::dynamic_bitset * dp_bondsInPlay
Definition: new_canon.h:369
std::vector< bondholder > bonds
Definition: new_canon.h:87
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition: new_canon.h:55
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition: new_canon.h:511
const unsigned int ATNUM_CLASS_OFFSET
Definition: new_canon.h:439
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=NULL, const boost::dynamic_bitset<> *bondsInPlay=NULL)
Definition: new_canon.h:184
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:116
bool operator<(const bondholder &o) const
Definition: new_canon.h:50
const std::string _CIPCode
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
int operator()(int i, int j) const
Definition: new_canon.h:395
unsigned int nbrSymClass
Definition: new_canon.h:34
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:652
const boost::dynamic_bitset * dp_bondsInPlay
Definition: new_canon.h:177
class for representing a bond
Definition: Bond.h:47
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition: new_canon.h:62
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:557
#define PRECONDITION(expr, mess)
Definition: Invariant.h:107
void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int > > &result)
unsigned int degree
Definition: new_canon.h:79
void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols=NULL, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
std::vector< int > revistedNeighbors
Definition: new_canon.h:86
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc)
Definition: new_canon.h:47
void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true)
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=NULL, const boost::dynamic_bitset<> *bondsInPlay=NULL)
Definition: new_canon.h:128
unsigned int bondStereo
Definition: new_canon.h:33
The class for representing atoms.
Definition: Atom.h:68
Canon::canon_atom * dp_atoms
Definition: new_canon.h:367