RDKit
Open-source cheminformatics and machine learning.
new_canon.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Greg Landrum
3 // Adapted from pseudo-code from Roger Sayle
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 
12 #include <RDGeneral/hanoiSort.h>
13 #include <GraphMol/ROMol.h>
14 #include <GraphMol/RingInfo.h>
15 #include <boost/cstdint.hpp>
16 #include <boost/foreach.hpp>
17 #include <boost/dynamic_bitset.hpp>
18 #include <cstring>
19 #include <iostream>
20 #include <cassert>
21 #include <cstring>
22 #include <vector>
23 
24 //#define VERBOSE_CANON 1
25 
26 namespace RDKit {
27 namespace Canon {
28 
29 struct bondholder {
31  unsigned int bondStereo;
32  unsigned int nbrSymClass;
33  unsigned int nbrIdx;
35  : bondType(Bond::UNSPECIFIED),
36  bondStereo(static_cast<unsigned int>(Bond::STEREONONE)),
37  nbrSymClass(0),
38  nbrIdx(0){};
39  bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni,
40  unsigned int nsc)
41  : bondType(bt),
42  bondStereo(static_cast<unsigned int>(bs)),
43  nbrSymClass(nsc),
44  nbrIdx(ni){};
45  bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
46  unsigned int nsc)
47  : bondType(bt), bondStereo(bs), nbrSymClass(nsc), nbrIdx(ni){};
48  bool operator<(const bondholder &o) const {
49  if (bondType != o.bondType) return bondType < o.bondType;
50  if (bondStereo != o.bondStereo) return bondStereo < o.bondStereo;
51  return nbrSymClass < o.nbrSymClass;
52  }
53  static bool greater(const bondholder &lhs, const bondholder &rhs) {
54  if (lhs.bondType != rhs.bondType) return lhs.bondType > rhs.bondType;
55  if (lhs.bondStereo != rhs.bondStereo)
56  return lhs.bondStereo > rhs.bondStereo;
57  return lhs.nbrSymClass > rhs.nbrSymClass;
58  }
59 
60  static int compare(const bondholder &x, const bondholder &y,
61  unsigned int div = 1) {
62  if (x.bondType < y.bondType)
63  return -1;
64  else if (x.bondType > y.bondType)
65  return 1;
66  if (x.bondStereo < y.bondStereo)
67  return -1;
68  else if (x.bondStereo > y.bondStereo)
69  return 1;
70  return x.nbrSymClass / div - y.nbrSymClass / div;
71  }
72 };
73 
74 struct canon_atom {
75  const Atom *atom;
76  int index;
77  unsigned int degree;
78  unsigned int totalNumHs;
79  bool hasRingNbr;
81  int *nbrIds;
82  const std::string *p_symbol; // if provided, this is used to order atoms
83  std::vector<int> neighborNum;
84  std::vector<int> revistedNeighbors;
85  std::vector<bondholder> bonds;
86 
88  : atom(NULL),
89  index(-1),
90  degree(0),
91  totalNumHs(0),
92  hasRingNbr(false),
93  isRingStereoAtom(false),
94  nbrIds(NULL),
95  p_symbol(NULL){};
96 };
97 
98 void updateAtomNeighborIndex(canon_atom *atoms, std::vector<bondholder> &nbrs);
99 
101  canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
102  std::vector<std::pair<unsigned int, unsigned int> > &result);
103 
104 /*
105  * Different types of atom compare functions:
106  *
107  * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
108  *dependent chirality
109  * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
110  *highly symmetrical graphs/molecules
111  * - AtomCompareFunctor: Basic atom compare function which also allows to
112  *include neighbors within the ranking
113  */
114 
116  public:
118  const ROMol *dp_mol;
119  const boost::dynamic_bitset<> *dp_atomsInPlay, *dp_bondsInPlay;
120 
122  : dp_atoms(NULL),
123  dp_mol(NULL),
124  dp_atomsInPlay(NULL),
125  dp_bondsInPlay(NULL){};
127  Canon::canon_atom *atoms, const ROMol &m,
128  const boost::dynamic_bitset<> *atomsInPlay = NULL,
129  const boost::dynamic_bitset<> *bondsInPlay = NULL)
130  : dp_atoms(atoms),
131  dp_mol(&m),
132  dp_atomsInPlay(atomsInPlay),
133  dp_bondsInPlay(bondsInPlay){};
134  int operator()(int i, int j) const {
135  PRECONDITION(dp_atoms, "no atoms");
136  PRECONDITION(dp_mol, "no molecule");
137  PRECONDITION(i != j, "bad call");
138  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
139  return 0;
140  }
141 
142  if ((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay) {
143  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
144  }
145  if ((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay) {
146  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
147  }
148  for (unsigned int ii = 0;
149  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
150  int cmp =
151  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
152  if (cmp) return cmp;
153  }
154 
155  std::vector<std::pair<unsigned int, unsigned int> > swapsi;
156  std::vector<std::pair<unsigned int, unsigned int> > swapsj;
157  if ((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay) {
158  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
159  }
160  if ((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay) {
161  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
162  }
163  for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
164  int cmp = swapsi[ii].second - swapsj[ii].second;
165  if (cmp) return cmp;
166  }
167  return 0;
168  }
169 };
170 
172  public:
174  const ROMol *dp_mol;
175  const boost::dynamic_bitset<> *dp_atomsInPlay, *dp_bondsInPlay;
176 
178  : dp_atoms(NULL),
179  dp_mol(NULL),
180  dp_atomsInPlay(NULL),
181  dp_bondsInPlay(NULL){};
183  Canon::canon_atom *atoms, const ROMol &m,
184  const boost::dynamic_bitset<> *atomsInPlay = NULL,
185  const boost::dynamic_bitset<> *bondsInPlay = NULL)
186  : dp_atoms(atoms),
187  dp_mol(&m),
188  dp_atomsInPlay(atomsInPlay),
189  dp_bondsInPlay(bondsInPlay){};
190  int operator()(int i, int j) const {
191  PRECONDITION(dp_atoms, "no atoms");
192  PRECONDITION(dp_mol, "no molecule");
193  PRECONDITION(i != j, "bad call");
194  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
195  return 0;
196  }
197 
198  if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
199  return -1;
200  } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
201  return 1;
202  }
203 
204  if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
205  return -1;
206  } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
207  return 1;
208  }
209 
210  if ((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay) {
211  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
212  }
213  if ((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay) {
214  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
215  }
216  for (unsigned int ii = 0;
217  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
218  int cmp =
219  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
220  if (cmp) return cmp;
221  }
222 
223  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
224  return -1;
225  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
226  return 1;
227  }
228  return 0;
229  }
230 };
231 
233  unsigned int getAtomRingNbrCode(unsigned int i) const {
234  if (!dp_atoms[i].hasRingNbr) return 0;
235 
236  int *nbrs = dp_atoms[i].nbrIds;
237  unsigned int code = 0;
238  for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
239  if (dp_atoms[nbrs[j]].isRingStereoAtom) {
240  code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
241  }
242  }
243  return code;
244  }
245 
246  int basecomp(int i, int j) const {
247  PRECONDITION(dp_atoms, "no atoms");
248  unsigned int ivi, ivj;
249 
250  // always start with the current class:
251  ivi = dp_atoms[i].index;
252  ivj = dp_atoms[j].index;
253  if (ivi < ivj)
254  return -1;
255  else if (ivi > ivj)
256  return 1;
257 
258  // use the atom-mapping numbers if they were assigned
259  /* boost::any_cast FILED here:
260  std::string molAtomMapNumber_i="";
261  std::string molAtomMapNumber_j="";
262  */
263  int molAtomMapNumber_i = 0;
264  int molAtomMapNumber_j = 0;
265  dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
266  molAtomMapNumber_i);
267  dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
268  molAtomMapNumber_j);
269  if (molAtomMapNumber_i < molAtomMapNumber_j)
270  return -1;
271  else if (molAtomMapNumber_i > molAtomMapNumber_j)
272  return 1;
273 
274  // start by comparing degree
275  ivi = dp_atoms[i].degree;
276  ivj = dp_atoms[j].degree;
277  if (ivi < ivj)
278  return -1;
279  else if (ivi > ivj)
280  return 1;
281 
282  if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
283  if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol))
284  return -1;
285  else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol))
286  return 1;
287  else
288  return 0;
289  }
290  // move onto atomic number
291  ivi = dp_atoms[i].atom->getAtomicNum();
292  ivj = dp_atoms[j].atom->getAtomicNum();
293  if (ivi < ivj)
294  return -1;
295  else if (ivi > ivj)
296  return 1;
297 
298  // isotopes if we're using them
299  if (df_useIsotopes) {
300  ivi = dp_atoms[i].atom->getIsotope();
301  ivj = dp_atoms[j].atom->getIsotope();
302  if (ivi < ivj)
303  return -1;
304  else if (ivi > ivj)
305  return 1;
306  }
307 
308  // nHs
309  ivi = dp_atoms[i].totalNumHs;
310  ivj = dp_atoms[j].totalNumHs;
311  if (ivi < ivj)
312  return -1;
313  else if (ivi > ivj)
314  return 1;
315 
316  // charge
317  ivi = dp_atoms[i].atom->getFormalCharge();
318  ivj = dp_atoms[j].atom->getFormalCharge();
319  if (ivi < ivj)
320  return -1;
321  else if (ivi > ivj)
322  return 1;
323 
324  // chirality if we're using it
325  if (df_useChirality) {
326  // first atom stereochem:
327  ivi = 0;
328  ivj = 0;
329  std::string cipCode;
330  if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
331  cipCode)) {
332  ivi = cipCode == "R" ? 2 : 1;
333  }
334  if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
335  cipCode)) {
336  ivj = cipCode == "R" ? 2 : 1;
337  }
338  if (ivi < ivj)
339  return -1;
340  else if (ivi > ivj)
341  return 1;
342 
343  // can't actually use values here, because they are arbitrary
344  ivi = dp_atoms[i].atom->getChiralTag() != 0;
345  ivj = dp_atoms[j].atom->getChiralTag() != 0;
346  if (ivi < ivj)
347  return -1;
348  else if (ivi > ivj)
349  return 1;
350  }
351  if (df_useChiralityRings) {
352  // ring stereochemistry
353  ivi = getAtomRingNbrCode(i);
354  ivj = getAtomRingNbrCode(j);
355  if (ivi < ivj)
356  return -1;
357  else if (ivi > ivj)
358  return 1;
359  // bond stereo is taken care of in the neighborhood comparison
360  }
361  return 0;
362  }
363 
364  public:
366  const ROMol *dp_mol;
367  const boost::dynamic_bitset<> *dp_atomsInPlay, *dp_bondsInPlay;
372 
374  : dp_atoms(NULL),
375  dp_mol(NULL),
376  dp_atomsInPlay(NULL),
377  dp_bondsInPlay(NULL),
378  df_useNbrs(false),
379  df_useIsotopes(true),
380  df_useChirality(true),
381  df_useChiralityRings(true){};
383  const boost::dynamic_bitset<> *atomsInPlay = NULL,
384  const boost::dynamic_bitset<> *bondsInPlay = NULL)
385  : dp_atoms(atoms),
386  dp_mol(&m),
387  dp_atomsInPlay(atomsInPlay),
388  dp_bondsInPlay(bondsInPlay),
389  df_useNbrs(false),
390  df_useIsotopes(true),
391  df_useChirality(true),
392  df_useChiralityRings(true){};
393  int operator()(int i, int j) const {
394  PRECONDITION(dp_atoms, "no atoms");
395  PRECONDITION(dp_mol, "no molecule");
396  PRECONDITION(i != j, "bad call");
397  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
398  return 0;
399  }
400 
401  int v = basecomp(i, j);
402  if (v) {
403  return v;
404  }
405 
406  if (df_useNbrs) {
407  if ((dp_atomsInPlay && (*dp_atomsInPlay)[i]) || !dp_atomsInPlay) {
408  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
409  }
410  if ((dp_atomsInPlay && (*dp_atomsInPlay)[j]) || !dp_atomsInPlay) {
411  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
412  }
413 
414  for (unsigned int ii = 0;
415  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
416  ++ii) {
417  int cmp =
418  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
419  if (cmp) return cmp;
420  }
421 
422  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
423  return -1;
424  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
425  return 1;
426  }
427  }
428  return 0;
429  }
430 };
431 
432 /*
433  * A compare function to discriminate chiral atoms, similar to the CIP rules.
434  * This functionality is currently not used.
435  */
436 
437 const unsigned int ATNUM_CLASS_OFFSET = 10000;
439  void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
440  for (unsigned j = 0; j < nbrs.size(); ++j) {
441  unsigned int nbrIdx = nbrs[j].nbrIdx;
442  if (nbrIdx == ATNUM_CLASS_OFFSET) {
443  // Ignore the Hs
444  continue;
445  }
446  const Atom *nbr = dp_atoms[nbrIdx].atom;
447  nbrs[j].nbrSymClass =
448  nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
449  }
450  std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
451  // FIX: don't want to be doing this long-term
452  }
453 
454  int basecomp(int i, int j) const {
455  PRECONDITION(dp_atoms, "no atoms");
456  unsigned int ivi, ivj;
457 
458  // always start with the current class:
459  ivi = dp_atoms[i].index;
460  ivj = dp_atoms[j].index;
461  if (ivi < ivj)
462  return -1;
463  else if (ivi > ivj)
464  return 1;
465 
466  // move onto atomic number
467  ivi = dp_atoms[i].atom->getAtomicNum();
468  ivj = dp_atoms[j].atom->getAtomicNum();
469  if (ivi < ivj)
470  return -1;
471  else if (ivi > ivj)
472  return 1;
473 
474  // isotopes:
475  ivi = dp_atoms[i].atom->getIsotope();
476  ivj = dp_atoms[j].atom->getIsotope();
477  if (ivi < ivj)
478  return -1;
479  else if (ivi > ivj)
480  return 1;
481 
482  // atom stereochem:
483  ivi = 0;
484  ivj = 0;
485  std::string cipCode;
486  if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
487  cipCode)) {
488  ivi = cipCode == "R" ? 2 : 1;
489  }
490  if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
491  cipCode)) {
492  ivj = cipCode == "R" ? 2 : 1;
493  }
494  if (ivi < ivj)
495  return -1;
496  else if (ivi > ivj)
497  return 1;
498 
499  // bond stereo is taken care of in the neighborhood comparison
500  return 0;
501  }
502 
503  public:
505  const ROMol *dp_mol;
508  : dp_atoms(NULL), dp_mol(NULL), df_useNbrs(false){};
510  : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false){};
511  int operator()(int i, int j) const {
512  PRECONDITION(dp_atoms, "no atoms");
513  PRECONDITION(dp_mol, "no molecule");
514  PRECONDITION(i != j, "bad call");
515  int v = basecomp(i, j);
516  if (v) return v;
517 
518  if (df_useNbrs) {
519  getAtomNeighborhood(dp_atoms[i].bonds);
520  getAtomNeighborhood(dp_atoms[j].bonds);
521 
522  // we do two passes through the neighbor lists. The first just uses the
523  // atomic numbers (by passing the optional 10000 to bondholder::compare),
524  // the second takes the already-computed index into account
525  for (unsigned int ii = 0;
526  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
527  ++ii) {
528  int cmp = bondholder::compare(
529  dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
530  if (cmp) return cmp;
531  }
532  for (unsigned int ii = 0;
533  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
534  ++ii) {
535  int cmp =
536  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
537  if (cmp) return cmp;
538  }
539  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
540  return -1;
541  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
542  return 1;
543  }
544  }
545  return 0;
546  }
547 };
548 
549 /*
550  * Basic canonicalization function to organize the partitions which will be
551  * sorted next.
552  * */
553 
554 template <typename CompareFunc>
555 void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
556  int mode, int *order, int *count, int &activeset,
557  int *next, int *changed, char *touchedPartitions) {
558  unsigned int nAtoms = mol.getNumAtoms();
559  int partition;
560  int symclass = 0;
561  int *start;
562  int offset;
563  int index;
564  int len;
565  int i;
566  // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
567 
568  // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
569  while (activeset != -1) {
570  // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
571  // std::cerr<<" next: ";
572  // for(unsigned int ii=0;ii<nAtoms;++ii){
573  // std::cerr<<ii<<":"<<next[ii]<<" ";
574  // }
575  // std::cerr<<std::endl;
576  // for(unsigned int ii=0;ii<nAtoms;++ii){
577  // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
578  // "<<atoms[order[ii]].index<<std::endl;
579  // }
580 
581  partition = activeset;
582  activeset = next[partition];
583  next[partition] = -2;
584 
585  len = count[partition];
586  offset = atoms[partition].index;
587  start = order + offset;
588  // std::cerr<<"\n\n**************************************************************"<<std::endl;
589  // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
590  // for(unsigned int ii=0;ii<len;++ii){
591  // std::cerr<<" "<<order[offset+ii]+1;
592  // }
593  // std::cerr<<std::endl;
594  // for(unsigned int ii=0;ii<nAtoms;++ii){
595  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
596  // "<<atoms[order[ii]].index<<std::endl;
597  // }
598  hanoisort(start, len, count, changed, compar);
599  // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
600  // std::cerr<<" result:";
601  // for(unsigned int ii=0;ii<nAtoms;++ii){
602  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
603  // "<<atoms[order[ii]].index<<std::endl;
604  // }
605  for (int k = 0; k < len; ++k) {
606  changed[start[k]] = 0;
607  }
608 
609  index = start[0];
610  // std::cerr<<" len:"<<len<<" index:"<<index<<"
611  // count:"<<count[index]<<std::endl;
612  for (i = count[index]; i < len; i++) {
613  index = start[i];
614  if (count[index]) symclass = offset + i;
615  atoms[index].index = symclass;
616  // std::cerr<<" "<<index+1<<"("<<symclass<<")";
617  // if(mode && (activeset<0 || count[index]>count[activeset]) ){
618  // activeset=index;
619  //}
620  for (unsigned j = 0; j < atoms[index].degree; ++j) {
621  changed[atoms[index].nbrIds[j]] = 1;
622  }
623  }
624  // std::cerr<<std::endl;
625 
626  if (mode) {
627  index = start[0];
628  for (i = count[index]; i < len; i++) {
629  index = start[i];
630  for (unsigned j = 0; j < atoms[index].degree; ++j) {
631  unsigned int nbor = atoms[index].nbrIds[j];
632  touchedPartitions[atoms[nbor].index] = 1;
633  }
634  }
635  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
636  if (touchedPartitions[ii]) {
637  partition = order[ii];
638  if ((count[partition] > 1) && (next[partition] == -2)) {
639  next[partition] = activeset;
640  activeset = partition;
641  }
642  touchedPartitions[ii] = 0;
643  }
644  }
645  }
646  }
647 } // end of RefinePartitions()
648 
649 template <typename CompareFunc>
650 void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
651  int mode, int *order, int *count, int &activeset, int *next,
652  int *changed, char *touchedPartitions) {
653  unsigned int nAtoms = mol.getNumAtoms();
654  int partition;
655  int offset;
656  int index;
657  int len;
658  int oldPart = 0;
659 
660  for (unsigned int i = 0; i < nAtoms; i++) {
661  partition = order[i];
662  oldPart = atoms[partition].index;
663  while (count[partition] > 1) {
664  len = count[partition];
665  offset = atoms[partition].index + len - 1;
666  index = order[offset];
667  atoms[index].index = offset;
668  count[partition] = len - 1;
669  count[index] = 1;
670 
671  // test for ions, water molecules with no
672  if (atoms[index].degree < 1) {
673  continue;
674  }
675  for (unsigned j = 0; j < atoms[index].degree; ++j) {
676  unsigned int nbor = atoms[index].nbrIds[j];
677  touchedPartitions[atoms[nbor].index] = 1;
678  changed[nbor] = 1;
679  }
680 
681  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
682  if (touchedPartitions[ii]) {
683  int npart = order[ii];
684  if ((count[npart] > 1) && (next[npart] == -2)) {
685  next[npart] = activeset;
686  activeset = npart;
687  }
688  touchedPartitions[ii] = 0;
689  }
690  }
691  RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
692  changed, touchedPartitions);
693  }
694  // not sure if this works each time
695  if (atoms[partition].index != oldPart) {
696  i -= 1;
697  }
698  }
699 } // end of BreakTies()
700 
701 void CreateSinglePartition(unsigned int nAtoms, int *order, int *count,
702  canon_atom *atoms);
703 
704 void ActivatePartitions(unsigned int nAtoms, int *order, int *count,
705  int &activeset, int *next, int *changed);
706 
707 void rankMolAtoms(const ROMol &mol, std::vector<unsigned int> &res,
708  bool breakTies = true, bool includeChirality = true,
709  bool includeIsotopes = true);
710 
711 void rankFragmentAtoms(const ROMol &mol, std::vector<unsigned int> &res,
712  const boost::dynamic_bitset<> &atomsInPlay,
713  const boost::dynamic_bitset<> &bondsInPlay,
714  const std::vector<std::string> *atomSymbols = NULL,
715  bool breakTies = true, bool includeChirality = true,
716  bool includeIsotopes = true);
717 
718 void chiralRankMolAtoms(const ROMol &mol, std::vector<unsigned int> &res);
719 
720 void initCanonAtoms(const ROMol &mol, std::vector<Canon::canon_atom> &atoms,
721  bool includeChirality = true);
722 
723 } // end of Canon namespace
724 } // end of RDKit namespace
int operator()(int i, int j) const
Definition: new_canon.h:511
const std::string * p_symbol
Definition: new_canon.h:82
void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
const boost::dynamic_bitset * dp_bondsInPlay
Definition: new_canon.h:119
Bond::BondType bondType
Definition: new_canon.h:30
std::vector< int > neighborNum
Definition: new_canon.h:83
void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
unsigned int totalNumHs
Definition: new_canon.h:78
unsigned int nbrIdx
Definition: new_canon.h:33
void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
const std::string molAtomMapNumber
Defines the primary molecule class ROMol as well as associated typedefs.
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition: hanoiSort.h:140
BondStereo
the nature of the bond&#39;s stereochem (for cis/trans)
Definition: Bond.h:96
unsigned int getNumAtoms(bool onlyExplicit=1) const
returns our number of atoms
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=NULL, const boost::dynamic_bitset<> *bondsInPlay=NULL)
Definition: new_canon.h:382
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc)
Definition: new_canon.h:39
BondType
the type of Bond
Definition: Bond.h:57
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:103
const boost::dynamic_bitset * dp_bondsInPlay
Definition: new_canon.h:367
std::vector< bondholder > bonds
Definition: new_canon.h:85
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition: new_canon.h:53
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition: new_canon.h:509
const unsigned int ATNUM_CLASS_OFFSET
Definition: new_canon.h:437
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=NULL, const boost::dynamic_bitset<> *bondsInPlay=NULL)
Definition: new_canon.h:182
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:116
bool operator<(const bondholder &o) const
Definition: new_canon.h:48
const std::string _CIPCode
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
int operator()(int i, int j) const
Definition: new_canon.h:393
unsigned int nbrSymClass
Definition: new_canon.h:32
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:650
const boost::dynamic_bitset * dp_bondsInPlay
Definition: new_canon.h:175
class for representing a bond
Definition: Bond.h:47
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition: new_canon.h:60
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:555
#define PRECONDITION(expr, mess)
Definition: Invariant.h:107
void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int > > &result)
unsigned int degree
Definition: new_canon.h:77
void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols=NULL, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
std::vector< int > revistedNeighbors
Definition: new_canon.h:84
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc)
Definition: new_canon.h:45
void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true)
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=NULL, const boost::dynamic_bitset<> *bondsInPlay=NULL)
Definition: new_canon.h:126
unsigned int bondStereo
Definition: new_canon.h:31
The class for representing atoms.
Definition: Atom.h:68
Canon::canon_atom * dp_atoms
Definition: new_canon.h:365