RDKit
Open-source cheminformatics and machine learning.
new_canon.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Greg Landrum
3 // Adapted from pseudo-code from Roger Sayle
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 
12 #include <RDGeneral/hanoiSort.h>
13 #include <GraphMol/ROMol.h>
14 #include <GraphMol/RingInfo.h>
16 #include <boost/cstdint.hpp>
17 #include <boost/foreach.hpp>
18 #include <boost/dynamic_bitset.hpp>
20 #include <cstring>
21 #include <iostream>
22 #include <cassert>
23 #include <cstring>
24 #include <vector>
25 
26 //#define VERBOSE_CANON 1
27 
28 namespace RDKit {
29 namespace Canon {
30 
31 struct bondholder {
33  unsigned int bondStereo;
34  unsigned int nbrSymClass;
35  unsigned int nbrIdx;
37  : bondType(Bond::UNSPECIFIED),
38  bondStereo(static_cast<unsigned int>(Bond::STEREONONE)),
39  nbrSymClass(0),
40  nbrIdx(0){};
41  bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni,
42  unsigned int nsc)
43  : bondType(bt),
44  bondStereo(static_cast<unsigned int>(bs)),
45  nbrSymClass(nsc),
46  nbrIdx(ni){};
47  bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
48  unsigned int nsc)
49  : bondType(bt), bondStereo(bs), nbrSymClass(nsc), nbrIdx(ni){};
50  bool operator<(const bondholder &o) const {
51  if (bondType != o.bondType) return bondType < o.bondType;
52  if (bondStereo != o.bondStereo) return bondStereo < o.bondStereo;
53  return nbrSymClass < o.nbrSymClass;
54  }
55  static bool greater(const bondholder &lhs, const bondholder &rhs) {
56  if (lhs.bondType != rhs.bondType) return lhs.bondType > rhs.bondType;
57  if (lhs.bondStereo != rhs.bondStereo)
58  return lhs.bondStereo > rhs.bondStereo;
59  return lhs.nbrSymClass > rhs.nbrSymClass;
60  }
61 
62  static int compare(const bondholder &x, const bondholder &y,
63  unsigned int div = 1) {
64  if (x.bondType < y.bondType)
65  return -1;
66  else if (x.bondType > y.bondType)
67  return 1;
68  if (x.bondStereo < y.bondStereo)
69  return -1;
70  else if (x.bondStereo > y.bondStereo)
71  return 1;
72  return x.nbrSymClass / div - y.nbrSymClass / div;
73  }
74 };
75 
76 struct canon_atom {
77  const Atom *atom;
78  int index;
79  unsigned int degree;
80  unsigned int totalNumHs;
81  bool hasRingNbr;
83  int *nbrIds;
84  const std::string *p_symbol; // if provided, this is used to order atoms
85  std::vector<int> neighborNum;
86  std::vector<int> revistedNeighbors;
87  std::vector<bondholder> bonds;
88 
90  : atom(NULL),
91  index(-1),
92  degree(0),
93  totalNumHs(0),
94  hasRingNbr(false),
95  isRingStereoAtom(false),
96  nbrIds(NULL),
97  p_symbol(NULL){};
98 };
99 
100 void updateAtomNeighborIndex(canon_atom *atoms, std::vector<bondholder> &nbrs);
101 
103  canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
104  std::vector<std::pair<unsigned int, unsigned int> > &result);
105 
106 /*
107  * Different types of atom compare functions:
108  *
109  * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
110  *dependent chirality
111  * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
112  *highly symmetrical graphs/molecules
113  * - AtomCompareFunctor: Basic atom compare function which also allows to
114  *include neighbors within the ranking
115  */
116 
118  public:
120  const ROMol *dp_mol;
121  const boost::dynamic_bitset<> *dp_atomsInPlay, *dp_bondsInPlay;
122 
124  : dp_atoms(NULL),
125  dp_mol(NULL),
126  dp_atomsInPlay(NULL),
127  dp_bondsInPlay(NULL){};
129  Canon::canon_atom *atoms, const ROMol &m,
130  const boost::dynamic_bitset<> *atomsInPlay = NULL,
131  const boost::dynamic_bitset<> *bondsInPlay = NULL)
132  : dp_atoms(atoms),
133  dp_mol(&m),
134  dp_atomsInPlay(atomsInPlay),
135  dp_bondsInPlay(bondsInPlay){};
136  int operator()(int i, int j) const {
137  PRECONDITION(dp_atoms, "no atoms");
138  PRECONDITION(dp_mol, "no molecule");
139  PRECONDITION(i != j, "bad call");
140  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
141  return 0;
142  }
143 
144  if ((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay) {
145  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
146  }
147  if ((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay) {
148  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
149  }
150  for (unsigned int ii = 0;
151  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
152  int cmp =
153  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
154  if (cmp) return cmp;
155  }
156 
157  std::vector<std::pair<unsigned int, unsigned int> > swapsi;
158  std::vector<std::pair<unsigned int, unsigned int> > swapsj;
159  if ((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay) {
160  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
161  }
162  if ((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay) {
163  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
164  }
165  for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
166  int cmp = swapsi[ii].second - swapsj[ii].second;
167  if (cmp) return cmp;
168  }
169  return 0;
170  }
171 };
172 
174  public:
176  const ROMol *dp_mol;
177  const boost::dynamic_bitset<> *dp_atomsInPlay, *dp_bondsInPlay;
178 
180  : dp_atoms(NULL),
181  dp_mol(NULL),
182  dp_atomsInPlay(NULL),
183  dp_bondsInPlay(NULL){};
185  Canon::canon_atom *atoms, const ROMol &m,
186  const boost::dynamic_bitset<> *atomsInPlay = NULL,
187  const boost::dynamic_bitset<> *bondsInPlay = NULL)
188  : dp_atoms(atoms),
189  dp_mol(&m),
190  dp_atomsInPlay(atomsInPlay),
191  dp_bondsInPlay(bondsInPlay){};
192  int operator()(int i, int j) const {
193  PRECONDITION(dp_atoms, "no atoms");
194  PRECONDITION(dp_mol, "no molecule");
195  PRECONDITION(i != j, "bad call");
196  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
197  return 0;
198  }
199 
200  if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
201  return -1;
202  } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
203  return 1;
204  }
205 
206  if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
207  return -1;
208  } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
209  return 1;
210  }
211 
212  if ((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay) {
213  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
214  }
215  if ((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay) {
216  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
217  }
218  for (unsigned int ii = 0;
219  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
220  int cmp =
221  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
222  if (cmp) return cmp;
223  }
224 
225  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
226  return -1;
227  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
228  return 1;
229  }
230  return 0;
231  }
232 };
233 
235  unsigned int getAtomRingNbrCode(unsigned int i) const {
236  if (!dp_atoms[i].hasRingNbr) return 0;
237 
238  int *nbrs = dp_atoms[i].nbrIds;
239  unsigned int code = 0;
240  for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
241  if (dp_atoms[nbrs[j]].isRingStereoAtom) {
242  code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
243  }
244  }
245  return code;
246  }
247 
248  int basecomp(int i, int j) const {
249  PRECONDITION(dp_atoms, "no atoms");
250  unsigned int ivi, ivj;
251 
252  // always start with the current class:
253  ivi = dp_atoms[i].index;
254  ivj = dp_atoms[j].index;
255  if (ivi < ivj)
256  return -1;
257  else if (ivi > ivj)
258  return 1;
259 
260  // use the atom-mapping numbers if they were assigned
261  /* boost::any_cast FILED here:
262  std::string molAtomMapNumber_i="";
263  std::string molAtomMapNumber_j="";
264  */
265  int molAtomMapNumber_i = 0;
266  int molAtomMapNumber_j = 0;
267  dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
268  molAtomMapNumber_i);
269  dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
270  molAtomMapNumber_j);
271  if (molAtomMapNumber_i < molAtomMapNumber_j)
272  return -1;
273  else if (molAtomMapNumber_i > molAtomMapNumber_j)
274  return 1;
275 
276  // start by comparing degree
277  ivi = dp_atoms[i].degree;
278  ivj = dp_atoms[j].degree;
279  if (ivi < ivj)
280  return -1;
281  else if (ivi > ivj)
282  return 1;
283 
284  if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
285  if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol))
286  return -1;
287  else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol))
288  return 1;
289  else
290  return 0;
291  }
292 
293  // move onto atomic number
294  ivi = dp_atoms[i].atom->getAtomicNum();
295  ivj = dp_atoms[j].atom->getAtomicNum();
296  if (ivi < ivj)
297  return -1;
298  else if (ivi > ivj)
299  return 1;
300 
301  // isotopes if we're using them
302  if (df_useIsotopes) {
303  ivi = dp_atoms[i].atom->getIsotope();
304  ivj = dp_atoms[j].atom->getIsotope();
305  if (ivi < ivj)
306  return -1;
307  else if (ivi > ivj)
308  return 1;
309  }
310 
311  // nHs
312  ivi = dp_atoms[i].totalNumHs;
313  ivj = dp_atoms[j].totalNumHs;
314  if (ivi < ivj)
315  return -1;
316  else if (ivi > ivj)
317  return 1;
318 
319  // charge
320  ivi = dp_atoms[i].atom->getFormalCharge();
321  ivj = dp_atoms[j].atom->getFormalCharge();
322  if (ivi < ivj)
323  return -1;
324  else if (ivi > ivj)
325  return 1;
326 
327  // chirality if we're using it
328  if (df_useChirality) {
329  // first atom stereochem:
330  ivi = 0;
331  ivj = 0;
332  std::string cipCode;
333  if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
334  cipCode)) {
335  ivi = cipCode == "R" ? 2 : 1;
336  }
337  if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
338  cipCode)) {
339  ivj = cipCode == "R" ? 2 : 1;
340  }
341  if (ivi < ivj)
342  return -1;
343  else if (ivi > ivj)
344  return 1;
345 
346  // can't actually use values here, because they are arbitrary
347  ivi = dp_atoms[i].atom->getChiralTag() != 0;
348  ivj = dp_atoms[j].atom->getChiralTag() != 0;
349  if (ivi < ivj)
350  return -1;
351  else if (ivi > ivj)
352  return 1;
353  }
354 
355  if (df_useChiralityRings) {
356  // ring stereochemistry
357  ivi = getAtomRingNbrCode(i);
358  ivj = getAtomRingNbrCode(j);
359  if (ivi < ivj)
360  return -1;
361  else if (ivi > ivj)
362  return 1;
363  // bond stereo is taken care of in the neighborhood comparison
364  }
365  return 0;
366  }
367 
368  public:
370  const ROMol *dp_mol;
371  const boost::dynamic_bitset<> *dp_atomsInPlay, *dp_bondsInPlay;
376 
378  : dp_atoms(NULL),
379  dp_mol(NULL),
380  dp_atomsInPlay(NULL),
381  dp_bondsInPlay(NULL),
382  df_useNbrs(false),
383  df_useIsotopes(true),
384  df_useChirality(true),
385  df_useChiralityRings(true){};
387  const boost::dynamic_bitset<> *atomsInPlay = NULL,
388  const boost::dynamic_bitset<> *bondsInPlay = NULL)
389  : dp_atoms(atoms),
390  dp_mol(&m),
391  dp_atomsInPlay(atomsInPlay),
392  dp_bondsInPlay(bondsInPlay),
393  df_useNbrs(false),
394  df_useIsotopes(true),
395  df_useChirality(true),
396  df_useChiralityRings(true){};
397  int operator()(int i, int j) const {
398  PRECONDITION(dp_atoms, "no atoms");
399  PRECONDITION(dp_mol, "no molecule");
400  PRECONDITION(i != j, "bad call");
401  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
402  return 0;
403  }
404 
405  int v = basecomp(i, j);
406  if (v) {
407  return v;
408  }
409 
410  if (df_useNbrs) {
411  if ((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay) {
412  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
413  }
414  if ((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay) {
415  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
416  }
417 
418  for (unsigned int ii = 0;
419  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
420  ++ii) {
421  int cmp =
422  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
423  if (cmp) return cmp;
424  }
425 
426  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
427  return -1;
428  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
429  return 1;
430  }
431  }
432  return 0;
433  }
434 };
435 
436 /*
437  * A compare function to discriminate chiral atoms, similar to the CIP rules.
438  * This functionality is currently not used.
439  */
440 
441 const unsigned int ATNUM_CLASS_OFFSET = 10000;
443  void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
444  for (unsigned j = 0; j < nbrs.size(); ++j) {
445  unsigned int nbrIdx = nbrs[j].nbrIdx;
446  if (nbrIdx == ATNUM_CLASS_OFFSET) {
447  // Ignore the Hs
448  continue;
449  }
450  const Atom *nbr = dp_atoms[nbrIdx].atom;
451  nbrs[j].nbrSymClass =
452  nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
453  }
454  std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
455  // FIX: don't want to be doing this long-term
456  }
457 
458  int basecomp(int i, int j) const {
459  PRECONDITION(dp_atoms, "no atoms");
460  unsigned int ivi, ivj;
461 
462  // always start with the current class:
463  ivi = dp_atoms[i].index;
464  ivj = dp_atoms[j].index;
465  if (ivi < ivj)
466  return -1;
467  else if (ivi > ivj)
468  return 1;
469 
470  // move onto atomic number
471  ivi = dp_atoms[i].atom->getAtomicNum();
472  ivj = dp_atoms[j].atom->getAtomicNum();
473  if (ivi < ivj)
474  return -1;
475  else if (ivi > ivj)
476  return 1;
477 
478  // isotopes:
479  ivi = dp_atoms[i].atom->getIsotope();
480  ivj = dp_atoms[j].atom->getIsotope();
481  if (ivi < ivj)
482  return -1;
483  else if (ivi > ivj)
484  return 1;
485 
486  // atom stereochem:
487  ivi = 0;
488  ivj = 0;
489  std::string cipCode;
490  if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
491  cipCode)) {
492  ivi = cipCode == "R" ? 2 : 1;
493  }
494  if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
495  cipCode)) {
496  ivj = cipCode == "R" ? 2 : 1;
497  }
498  if (ivi < ivj)
499  return -1;
500  else if (ivi > ivj)
501  return 1;
502 
503  // bond stereo is taken care of in the neighborhood comparison
504  return 0;
505  }
506 
507  public:
509  const ROMol *dp_mol;
512  : dp_atoms(NULL), dp_mol(NULL), df_useNbrs(false){};
514  : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false){};
515  int operator()(int i, int j) const {
516  PRECONDITION(dp_atoms, "no atoms");
517  PRECONDITION(dp_mol, "no molecule");
518  PRECONDITION(i != j, "bad call");
519  int v = basecomp(i, j);
520  if (v) return v;
521 
522  if (df_useNbrs) {
523  getAtomNeighborhood(dp_atoms[i].bonds);
524  getAtomNeighborhood(dp_atoms[j].bonds);
525 
526  // we do two passes through the neighbor lists. The first just uses the
527  // atomic numbers (by passing the optional 10000 to bondholder::compare),
528  // the second takes the already-computed index into account
529  for (unsigned int ii = 0;
530  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
531  ++ii) {
532  int cmp = bondholder::compare(
533  dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
534  if (cmp) return cmp;
535  }
536  for (unsigned int ii = 0;
537  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
538  ++ii) {
539  int cmp =
540  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
541  if (cmp) return cmp;
542  }
543  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
544  return -1;
545  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
546  return 1;
547  }
548  }
549  return 0;
550  }
551 };
552 
553 /*
554  * Basic canonicalization function to organize the partitions which will be
555  * sorted next.
556  * */
557 
558 template <typename CompareFunc>
559 void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
560  int mode, int *order, int *count, int &activeset,
561  int *next, int *changed, char *touchedPartitions) {
562  unsigned int nAtoms = mol.getNumAtoms();
563  int partition;
564  int symclass = 0;
565  int *start;
566  int offset;
567  int index;
568  int len;
569  int i;
570  // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
571 
572  // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
573  while (activeset != -1) {
574  // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
575  // std::cerr<<" next: ";
576  // for(unsigned int ii=0;ii<nAtoms;++ii){
577  // std::cerr<<ii<<":"<<next[ii]<<" ";
578  // }
579  // std::cerr<<std::endl;
580  // for(unsigned int ii=0;ii<nAtoms;++ii){
581  // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
582  // "<<atoms[order[ii]].index<<std::endl;
583  // }
584 
585  partition = activeset;
586  activeset = next[partition];
587  next[partition] = -2;
588 
589  len = count[partition];
590  offset = atoms[partition].index;
591  start = order + offset;
592  // std::cerr<<"\n\n**************************************************************"<<std::endl;
593  // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
594  // for(unsigned int ii=0;ii<len;++ii){
595  // std::cerr<<" "<<order[offset+ii]+1;
596  // }
597  // std::cerr<<std::endl;
598  // for(unsigned int ii=0;ii<nAtoms;++ii){
599  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
600  // "<<atoms[order[ii]].index<<std::endl;
601  // }
602  hanoisort(start, len, count, changed, compar);
603  // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
604  // std::cerr<<" result:";
605  // for(unsigned int ii=0;ii<nAtoms;++ii){
606  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
607  // "<<atoms[order[ii]].index<<std::endl;
608  // }
609  for (int k = 0; k < len; ++k) {
610  changed[start[k]] = 0;
611  }
612 
613  index = start[0];
614  // std::cerr<<" len:"<<len<<" index:"<<index<<"
615  // count:"<<count[index]<<std::endl;
616  for (i = count[index]; i < len; i++) {
617  index = start[i];
618  if (count[index]) symclass = offset + i;
619  atoms[index].index = symclass;
620  // std::cerr<<" "<<index+1<<"("<<symclass<<")";
621  // if(mode && (activeset<0 || count[index]>count[activeset]) ){
622  // activeset=index;
623  //}
624  for (unsigned j = 0; j < atoms[index].degree; ++j) {
625  changed[atoms[index].nbrIds[j]] = 1;
626  }
627  }
628  // std::cerr<<std::endl;
629 
630  if (mode) {
631  index = start[0];
632  for (i = count[index]; i < len; i++) {
633  index = start[i];
634  for (unsigned j = 0; j < atoms[index].degree; ++j) {
635  unsigned int nbor = atoms[index].nbrIds[j];
636  touchedPartitions[atoms[nbor].index] = 1;
637  }
638  }
639  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
640  if (touchedPartitions[ii]) {
641  partition = order[ii];
642  if ((count[partition] > 1) && (next[partition] == -2)) {
643  next[partition] = activeset;
644  activeset = partition;
645  }
646  touchedPartitions[ii] = 0;
647  }
648  }
649  }
650  }
651 } // end of RefinePartitions()
652 
653 template <typename CompareFunc>
654 void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
655  int mode, int *order, int *count, int &activeset, int *next,
656  int *changed, char *touchedPartitions) {
657  unsigned int nAtoms = mol.getNumAtoms();
658  int partition;
659  int offset;
660  int index;
661  int len;
662  int oldPart = 0;
663 
664  for (unsigned int i = 0; i < nAtoms; i++) {
665  partition = order[i];
666  oldPart = atoms[partition].index;
667  while (count[partition] > 1) {
668  len = count[partition];
669  offset = atoms[partition].index + len - 1;
670  index = order[offset];
671  atoms[index].index = offset;
672  count[partition] = len - 1;
673  count[index] = 1;
674 
675  // test for ions, water molecules with no
676  if (atoms[index].degree < 1) {
677  continue;
678  }
679  for (unsigned j = 0; j < atoms[index].degree; ++j) {
680  unsigned int nbor = atoms[index].nbrIds[j];
681  touchedPartitions[atoms[nbor].index] = 1;
682  changed[nbor] = 1;
683  }
684 
685  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
686  if (touchedPartitions[ii]) {
687  int npart = order[ii];
688  if ((count[npart] > 1) && (next[npart] == -2)) {
689  next[npart] = activeset;
690  activeset = npart;
691  }
692  touchedPartitions[ii] = 0;
693  }
694  }
695  RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
696  changed, touchedPartitions);
697  }
698  // not sure if this works each time
699  if (atoms[partition].index != oldPart) {
700  i -= 1;
701  }
702  }
703 } // end of BreakTies()
704 
705 void CreateSinglePartition(unsigned int nAtoms, int *order, int *count,
706  canon_atom *atoms);
707 
708 void ActivatePartitions(unsigned int nAtoms, int *order, int *count,
709  int &activeset, int *next, int *changed);
710 
711 void rankMolAtoms(const ROMol &mol, std::vector<unsigned int> &res,
712  bool breakTies = true, bool includeChirality = true,
713  bool includeIsotopes = true);
714 
715 void rankFragmentAtoms(const ROMol &mol, std::vector<unsigned int> &res,
716  const boost::dynamic_bitset<> &atomsInPlay,
717  const boost::dynamic_bitset<> &bondsInPlay,
718  const std::vector<std::string> *atomSymbols = NULL,
719  bool breakTies = true, bool includeChirality = true,
720  bool includeIsotopes = true);
721 
722 void chiralRankMolAtoms(const ROMol &mol, std::vector<unsigned int> &res);
723 
724 void initCanonAtoms(const ROMol &mol, std::vector<Canon::canon_atom> &atoms,
725  bool includeChirality = true);
726 
727 } // end of Canon namespace
728 } // end of RDKit namespace
int operator()(int i, int j) const
Definition: new_canon.h:515
const std::string * p_symbol
Definition: new_canon.h:84
void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
const boost::dynamic_bitset * dp_bondsInPlay
Definition: new_canon.h:121
Bond::BondType bondType
Definition: new_canon.h:32
std::vector< int > neighborNum
Definition: new_canon.h:85
void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
unsigned int totalNumHs
Definition: new_canon.h:80
unsigned int nbrIdx
Definition: new_canon.h:35
void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
const std::string molAtomMapNumber
Defines the primary molecule class ROMol as well as associated typedefs.
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition: hanoiSort.h:140
BondStereo
the nature of the bond&#39;s stereochem (for cis/trans)
Definition: Bond.h:96
unsigned int getNumAtoms(bool onlyExplicit=1) const
returns our number of atoms
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=NULL, const boost::dynamic_bitset<> *bondsInPlay=NULL)
Definition: new_canon.h:386
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc)
Definition: new_canon.h:41
BondType
the type of Bond
Definition: Bond.h:57
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:103
const boost::dynamic_bitset * dp_bondsInPlay
Definition: new_canon.h:371
std::vector< bondholder > bonds
Definition: new_canon.h:87
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition: new_canon.h:55
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition: new_canon.h:513
const unsigned int ATNUM_CLASS_OFFSET
Definition: new_canon.h:441
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=NULL, const boost::dynamic_bitset<> *bondsInPlay=NULL)
Definition: new_canon.h:184
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:116
bool operator<(const bondholder &o) const
Definition: new_canon.h:50
const std::string _CIPCode
Std stuff.
Definition: Atom.h:29
int operator()(int i, int j) const
Definition: new_canon.h:397
unsigned int nbrSymClass
Definition: new_canon.h:34
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:654
const boost::dynamic_bitset * dp_bondsInPlay
Definition: new_canon.h:177
class for representing a bond
Definition: Bond.h:47
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition: new_canon.h:62
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:559
#define PRECONDITION(expr, mess)
Definition: Invariant.h:107
void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int > > &result)
unsigned int degree
Definition: new_canon.h:79
void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols=NULL, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
std::vector< int > revistedNeighbors
Definition: new_canon.h:86
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc)
Definition: new_canon.h:47
void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true)
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=NULL, const boost::dynamic_bitset<> *bondsInPlay=NULL)
Definition: new_canon.h:128
unsigned int bondStereo
Definition: new_canon.h:33
The class for representing atoms.
Definition: Atom.h:68
Canon::canon_atom * dp_atoms
Definition: new_canon.h:369