RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
new_canon.h
Go to the documentation of this file.
1//
2// Copyright (C) 2014 Greg Landrum
3// Adapted from pseudo-code from Roger Sayle
4//
5// @@ All Rights Reserved @@
6// This file is part of the RDKit.
7// The contents are covered by the terms of the BSD license
8// which is included in the file license.txt, found at the root
9// of the RDKit source tree.
10//
11
12#include <RDGeneral/export.h>
13#include <RDGeneral/hanoiSort.h>
14#include <GraphMol/ROMol.h>
15#include <GraphMol/RingInfo.h>
18#include <cstdint>
19#include <boost/dynamic_bitset.hpp>
21#include <cstring>
22#include <iostream>
23#include <cassert>
24#include <cstring>
25#include <vector>
26
27// #define VERBOSE_CANON 1
28
29namespace RDKit {
30namespace Canon {
31struct canon_atom;
32
34 Bond::BondType bondType{Bond::BondType::UNSPECIFIED};
35 unsigned int bondStereo{
36 static_cast<unsigned int>(Bond::BondStereo::STEREONONE)};
37 unsigned int nbrSymClass{0};
38 unsigned int nbrIdx{0};
39 Bond::BondStereo stype{Bond::BondStereo::STEREONONE};
40 const canon_atom *controllingAtoms[4]{nullptr, nullptr, nullptr, nullptr};
41 const std::string *p_symbol{
42 nullptr}; // if provided, this is used to order bonds
43 unsigned int bondIdx{0};
44
47 unsigned int nsc, unsigned int bidx)
48 : bondType(bt),
49 bondStereo(static_cast<unsigned int>(bs)),
50 nbrSymClass(nsc),
51 nbrIdx(ni),
52 bondIdx(bidx) {}
53 bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
54 unsigned int nsc, unsigned int bidx)
55 : bondType(bt),
56 bondStereo(bs),
57 nbrSymClass(nsc),
58 nbrIdx(ni),
59 bondIdx(bidx) {}
60
61 int compareStereo(const bondholder &o) const;
62
63 bool operator<(const bondholder &o) const { return compare(*this, o) < 0; }
64 static bool greater(const bondholder &lhs, const bondholder &rhs) {
65 return compare(lhs, rhs) > 0;
66 }
67
68 static int compare(const bondholder &x, const bondholder &y,
69 unsigned int div = 1) {
70 if (x.p_symbol && y.p_symbol) {
71 if ((*x.p_symbol) < (*y.p_symbol)) {
72 return -1;
73 } else if ((*x.p_symbol) > (*y.p_symbol)) {
74 return 1;
75 }
76 }
77 if (x.bondType < y.bondType) {
78 return -1;
79 } else if (x.bondType > y.bondType) {
80 return 1;
81 }
82 if (x.bondStereo < y.bondStereo) {
83 return -1;
84 } else if (x.bondStereo > y.bondStereo) {
85 return 1;
86 }
87 auto scdiv = x.nbrSymClass / div - y.nbrSymClass / div;
88 if (scdiv) {
89 return scdiv;
90 }
91 if (x.bondStereo && y.bondStereo) {
92 auto cs = x.compareStereo(y);
93 if (cs) {
94 return cs;
95 }
96 }
97 return 0;
98 }
99};
101 const Atom *atom{nullptr};
102 int index{-1};
103 unsigned int degree{0};
104 unsigned int totalNumHs{0};
105 bool hasRingNbr{false};
106 bool isRingStereoAtom{false};
107 unsigned int whichStereoGroup{0};
108 StereoGroupType typeOfStereoGroup{StereoGroupType::STEREO_ABSOLUTE};
109 std::unique_ptr<int[]> nbrIds;
110 const std::string *p_symbol{
111 nullptr}; // if provided, this is used to order atoms
112 std::vector<int> neighborNum;
113 std::vector<int> revistedNeighbors;
114 std::vector<bondholder> bonds;
115};
116
118 canon_atom *atoms, std::vector<bondholder> &nbrs);
119
121 canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
122 std::vector<std::pair<unsigned int, unsigned int>> &result);
123
124/*
125 * Different types of atom compare functions:
126 *
127 * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
128 *dependent chirality
129 * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
130 *highly symmetrical graphs/molecules
131 * - AtomCompareFunctor: Basic atom compare function which also allows to
132 *include neighbors within the ranking
133 */
134
136 public:
137 Canon::canon_atom *dp_atoms{nullptr};
138 const ROMol *dp_mol{nullptr};
139 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
140 *dp_bondsInPlay{nullptr};
141
144 Canon::canon_atom *atoms, const ROMol &m,
145 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
146 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
147 : dp_atoms(atoms),
148 dp_mol(&m),
149 dp_atomsInPlay(atomsInPlay),
150 dp_bondsInPlay(bondsInPlay) {}
151 int operator()(int i, int j) const {
152 PRECONDITION(dp_atoms, "no atoms");
153 PRECONDITION(dp_mol, "no molecule");
154 PRECONDITION(i != j, "bad call");
155 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
156 return 0;
157 }
158
159 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
160 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
161 }
162 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
163 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
164 }
165 for (unsigned int ii = 0;
166 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
167 int cmp =
168 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
169 if (cmp) {
170 return cmp;
171 }
172 }
173
174 std::vector<std::pair<unsigned int, unsigned int>> swapsi;
175 std::vector<std::pair<unsigned int, unsigned int>> swapsj;
176 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
177 updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
178 }
179 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
180 updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
181 }
182 for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
183 int cmp = swapsi[ii].second - swapsj[ii].second;
184 if (cmp) {
185 return cmp;
186 }
187 }
188 return 0;
189 }
190};
191
193 public:
194 Canon::canon_atom *dp_atoms{nullptr};
195 const ROMol *dp_mol{nullptr};
196 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
197 *dp_bondsInPlay{nullptr};
198
201 Canon::canon_atom *atoms, const ROMol &m,
202 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
203 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
204 : dp_atoms(atoms),
205 dp_mol(&m),
206 dp_atomsInPlay(atomsInPlay),
207 dp_bondsInPlay(bondsInPlay) {}
208 int operator()(int i, int j) const {
209 PRECONDITION(dp_atoms, "no atoms");
210 PRECONDITION(dp_mol, "no molecule");
211 PRECONDITION(i != j, "bad call");
212 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
213 return 0;
214 }
215
216 if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
217 return -1;
218 } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
219 return 1;
220 }
221
222 if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
223 return -1;
224 } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
225 return 1;
226 }
227
228 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
229 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
230 }
231 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
232 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
233 }
234 for (unsigned int ii = 0;
235 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
236 int cmp =
237 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
238 if (cmp) {
239 return cmp;
240 }
241 }
242
243 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
244 return -1;
245 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
246 return 1;
247 }
248 return 0;
249 }
250};
251
252namespace {
253unsigned int getChiralRank(const ROMol *dp_mol, canon_atom *dp_atoms,
254 unsigned int i) {
255 unsigned int res = 0;
256 std::vector<unsigned int> perm;
257 perm.reserve(dp_atoms[i].atom->getDegree());
258 for (const auto nbr : dp_mol->atomNeighbors(dp_atoms[i].atom)) {
259 auto rnk = dp_atoms[nbr->getIdx()].index;
260 // make sure we don't have duplicate ranks
261 if (std::find(perm.begin(), perm.end(), rnk) != perm.end()) {
262 break;
263 } else {
264 perm.push_back(rnk);
265 }
266 }
267 if (perm.size() == dp_atoms[i].atom->getDegree()) {
268 auto ctag = dp_atoms[i].atom->getChiralTag();
271 auto sortedPerm = perm;
272 std::sort(sortedPerm.begin(), sortedPerm.end());
275 if (nswaps % 2) {
276 res = res == 2 ? 1 : 2;
277 }
278 }
279 }
280 return res;
281}
282} // namespace
284 unsigned int getAtomRingNbrCode(unsigned int i) const {
285 if (!dp_atoms[i].hasRingNbr) {
286 return 0;
287 }
288
289 auto nbrs = dp_atoms[i].nbrIds.get();
290 unsigned int code = 0;
291 for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
292 if (dp_atoms[nbrs[j]].isRingStereoAtom) {
293 code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
294 }
295 }
296 return code;
297 }
298
299 int basecomp(int i, int j) const {
300 unsigned int ivi, ivj;
301
302 // always start with the current class:
303 ivi = dp_atoms[i].index;
304 ivj = dp_atoms[j].index;
305 if (ivi < ivj) {
306 return -1;
307 } else if (ivi > ivj) {
308 return 1;
309 }
310 if (df_useAtomMaps) {
311 // use the atom-mapping numbers if they were assigned
312 int molAtomMapNumber_i = 0;
313 int molAtomMapNumber_j = 0;
314 dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
315 molAtomMapNumber_i);
316 dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
317 molAtomMapNumber_j);
318 if (molAtomMapNumber_i < molAtomMapNumber_j) {
319 return -1;
320 } else if (molAtomMapNumber_i > molAtomMapNumber_j) {
321 return 1;
322 }
323 }
324 // start by comparing degree
325 ivi = dp_atoms[i].degree;
326 ivj = dp_atoms[j].degree;
327 if (ivi < ivj) {
328 return -1;
329 } else if (ivi > ivj) {
330 return 1;
331 }
332 if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
333 if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol)) {
334 return -1;
335 } else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol)) {
336 return 1;
337 } else {
338 return 0;
339 }
340 }
341
342 // move onto atomic number
343 ivi = dp_atoms[i].atom->getAtomicNum();
344 ivj = dp_atoms[j].atom->getAtomicNum();
345 if (ivi < ivj) {
346 return -1;
347 } else if (ivi > ivj) {
348 return 1;
349 }
350 // isotopes if we're using them
351 if (df_useIsotopes) {
352 ivi = dp_atoms[i].atom->getIsotope();
353 ivj = dp_atoms[j].atom->getIsotope();
354 if (ivi < ivj) {
355 return -1;
356 } else if (ivi > ivj) {
357 return 1;
358 }
359 }
360
361 // nHs
362 ivi = dp_atoms[i].totalNumHs;
363 ivj = dp_atoms[j].totalNumHs;
364 if (ivi < ivj) {
365 return -1;
366 } else if (ivi > ivj) {
367 return 1;
368 }
369 // charge
370 ivi = dp_atoms[i].atom->getFormalCharge();
371 ivj = dp_atoms[j].atom->getFormalCharge();
372 if (ivi < ivj) {
373 return -1;
374 } else if (ivi > ivj) {
375 return 1;
376 }
377 // chirality if we're using it
378 if (df_useChirality) {
379 // look at enhanced stereo
380 ivi = dp_atoms[i].whichStereoGroup; // can't use the index itself, but if
381 // it's set then we're in an SG
382 ivj = dp_atoms[j].whichStereoGroup;
383 if (ivi || ivj) {
384 if (ivi && !ivj) {
385 return 1;
386 } else if (ivj && !ivi) {
387 return -1;
388 } else if (ivi && ivj) {
389 ivi = static_cast<unsigned int>(dp_atoms[i].typeOfStereoGroup);
390 ivj = static_cast<unsigned int>(dp_atoms[j].typeOfStereoGroup);
391 if (ivi < ivj) {
392 return -1;
393 } else if (ivi > ivj) {
394 return 1;
395 }
396 ivi = dp_atoms[i].whichStereoGroup - 1;
397 ivj = dp_atoms[j].whichStereoGroup - 1;
398 // now check the current classes of the other members of the SG
399 std::set<unsigned int> sgi;
400 for (auto sgat : dp_mol->getStereoGroups()[ivi].getAtoms()) {
401 sgi.insert(dp_atoms[sgat->getIdx()].index);
402 }
403 std::set<unsigned int> sgj;
404 for (auto sgat : dp_mol->getStereoGroups()[ivj].getAtoms()) {
405 sgj.insert(dp_atoms[sgat->getIdx()].index);
406 }
407 if (sgi < sgj) {
408 return -1;
409 } else if (sgi > sgj) {
410 return 1;
411 }
412 }
413 } else {
414 // if there's no stereogroup, then use whatever atom stereochem is
415 // specfied:
416 ivi = 0;
417 ivj = 0;
418 // can't actually use values here, because they are arbitrary
419 ivi = dp_atoms[i].atom->getChiralTag() != 0;
420 ivj = dp_atoms[j].atom->getChiralTag() != 0;
421 if (ivi < ivj) {
422 return -1;
423 } else if (ivi > ivj) {
424 return 1;
425 }
426 // stereo set
427 if (ivi && ivj) {
428 if (ivi) {
429 ivi = getChiralRank(dp_mol, dp_atoms, i);
430 }
431 if (ivj) {
432 ivj = getChiralRank(dp_mol, dp_atoms, j);
433 }
434 if (ivi < ivj) {
435 return -1;
436 } else if (ivi > ivj) {
437 return 1;
438 }
439 }
440 }
441 }
442
443 if (df_useChiralityRings) {
444 // ring stereochemistry
445 ivi = getAtomRingNbrCode(i);
446 ivj = getAtomRingNbrCode(j);
447 if (ivi < ivj) {
448 return -1;
449 } else if (ivi > ivj) {
450 return 1;
451 } // bond stereo is taken care of in the neighborhood comparison
452 }
453 return 0;
454 }
455
456 public:
457 Canon::canon_atom *dp_atoms{nullptr};
458 const ROMol *dp_mol{nullptr};
459 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
460 *dp_bondsInPlay{nullptr};
461 bool df_useNbrs{false};
462 bool df_useIsotopes{true};
463 bool df_useChirality{true};
464 bool df_useChiralityRings{true};
465 bool df_useAtomMaps{true};
466
469 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
470 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
471 : dp_atoms(atoms),
472 dp_mol(&m),
473 dp_atomsInPlay(atomsInPlay),
474 dp_bondsInPlay(bondsInPlay),
475 df_useNbrs(false),
476 df_useIsotopes(true),
477 df_useChirality(true),
478 df_useChiralityRings(true),
479 df_useAtomMaps(true) {}
480 int operator()(int i, int j) const {
481 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
482 return 0;
483 }
484 int v = basecomp(i, j);
485 if (v) {
486 return v;
487 }
488
489 if (df_useNbrs) {
490 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
491 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
492 }
493 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
494 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
495 }
496
497 for (unsigned int ii = 0;
498 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
499 ++ii) {
500 int cmp =
501 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
502 if (cmp) {
503 return cmp;
504 }
505 }
506
507 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
508 return -1;
509 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
510 return 1;
511 }
512 }
513 return 0;
514 }
515};
516
517/*
518 * A compare function to discriminate chiral atoms, similar to the CIP rules.
519 * This functionality is currently not used.
520 */
521
522const unsigned int ATNUM_CLASS_OFFSET = 10000;
524 void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
525 for (unsigned j = 0; j < nbrs.size(); ++j) {
526 unsigned int nbrIdx = nbrs[j].nbrIdx;
527 if (nbrIdx == ATNUM_CLASS_OFFSET) {
528 // Ignore the Hs
529 continue;
530 }
531 const Atom *nbr = dp_atoms[nbrIdx].atom;
532 nbrs[j].nbrSymClass =
533 nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
534 }
535 std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
536 // FIX: don't want to be doing this long-term
537 }
538
539 int basecomp(int i, int j) const {
540 PRECONDITION(dp_atoms, "no atoms");
541 unsigned int ivi, ivj;
542
543 // always start with the current class:
544 ivi = dp_atoms[i].index;
545 ivj = dp_atoms[j].index;
546 if (ivi < ivj) {
547 return -1;
548 } else if (ivi > ivj) {
549 return 1;
550 }
551
552 // move onto atomic number
553 ivi = dp_atoms[i].atom->getAtomicNum();
554 ivj = dp_atoms[j].atom->getAtomicNum();
555 if (ivi < ivj) {
556 return -1;
557 } else if (ivi > ivj) {
558 return 1;
559 }
560
561 // isotopes:
562 ivi = dp_atoms[i].atom->getIsotope();
563 ivj = dp_atoms[j].atom->getIsotope();
564 if (ivi < ivj) {
565 return -1;
566 } else if (ivi > ivj) {
567 return 1;
568 }
569
570 // atom stereochem:
571 ivi = 0;
572 ivj = 0;
573 std::string cipCode;
574 if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
575 cipCode)) {
576 ivi = cipCode == "R" ? 2 : 1;
577 }
578 if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
579 cipCode)) {
580 ivj = cipCode == "R" ? 2 : 1;
581 }
582 if (ivi < ivj) {
583 return -1;
584 } else if (ivi > ivj) {
585 return 1;
586 }
587
588 // bond stereo is taken care of in the neighborhood comparison
589 return 0;
590 }
591
592 public:
593 Canon::canon_atom *dp_atoms{nullptr};
594 const ROMol *dp_mol{nullptr};
595 bool df_useNbrs{false};
598 : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false) {}
599 int operator()(int i, int j) const {
600 PRECONDITION(dp_atoms, "no atoms");
601 PRECONDITION(dp_mol, "no molecule");
602 PRECONDITION(i != j, "bad call");
603 int v = basecomp(i, j);
604 if (v) {
605 return v;
606 }
607
608 if (df_useNbrs) {
609 getAtomNeighborhood(dp_atoms[i].bonds);
610 getAtomNeighborhood(dp_atoms[j].bonds);
611
612 // we do two passes through the neighbor lists. The first just uses the
613 // atomic numbers (by passing the optional 10000 to bondholder::compare),
614 // the second takes the already-computed index into account
615 for (unsigned int ii = 0;
616 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
617 ++ii) {
618 int cmp = bondholder::compare(
619 dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
620 if (cmp) {
621 return cmp;
622 }
623 }
624 for (unsigned int ii = 0;
625 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
626 ++ii) {
627 int cmp =
628 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
629 if (cmp) {
630 return cmp;
631 }
632 }
633 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
634 return -1;
635 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
636 return 1;
637 }
638 }
639 return 0;
640 }
641};
642
643/*
644 * Basic canonicalization function to organize the partitions which will be
645 * sorted next.
646 * */
647
648template <typename CompareFunc>
650 int mode, int *order, int *count, int &activeset,
651 int *next, int *changed, char *touchedPartitions) {
652 unsigned int nAtoms = mol.getNumAtoms();
653 int partition;
654 int symclass = 0;
655 int *start;
656 int offset;
657 int index;
658 int len;
659 int i;
660 // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
661
662 // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
663 while (activeset != -1) {
664 // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
665 // std::cerr<<" next: ";
666 // for(unsigned int ii=0;ii<nAtoms;++ii){
667 // std::cerr<<ii<<":"<<next[ii]<<" ";
668 // }
669 // std::cerr<<std::endl;
670 // for(unsigned int ii=0;ii<nAtoms;++ii){
671 // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
672 // "<<atoms[order[ii]].index<<std::endl;
673 // }
674
676 activeset = next[partition];
677 next[partition] = -2;
678
680 offset = atoms[partition].index;
681 start = order + offset;
682 // std::cerr<<"\n\n**************************************************************"<<std::endl;
683 // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
684 // for(unsigned int ii=0;ii<len;++ii){
685 // std::cerr<<" "<<order[offset+ii]+1;
686 // }
687 // std::cerr<<std::endl;
688 // for(unsigned int ii=0;ii<nAtoms;++ii){
689 // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
690 // "<<atoms[order[ii]].index<<std::endl;
691 // }
693 // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
694 // std::cerr<<" result:";
695 // for(unsigned int ii=0;ii<nAtoms;++ii){
696 // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
697 // "<<atoms[order[ii]].index<<std::endl;
698 // }
699 for (int k = 0; k < len; ++k) {
700 changed[start[k]] = 0;
701 }
702
703 index = start[0];
704 // std::cerr<<" len:"<<len<<" index:"<<index<<"
705 // count:"<<count[index]<<std::endl;
706 for (i = count[index]; i < len; i++) {
707 index = start[i];
708 if (count[index]) {
709 symclass = offset + i;
710 }
711 atoms[index].index = symclass;
712 // std::cerr<<" "<<index+1<<"("<<symclass<<")";
713 // if(mode && (activeset<0 || count[index]>count[activeset]) ){
714 // activeset=index;
715 //}
716 for (unsigned j = 0; j < atoms[index].degree; ++j) {
717 changed[atoms[index].nbrIds[j]] = 1;
718 }
719 }
720 // std::cerr<<std::endl;
721
722 if (mode) {
723 index = start[0];
724 for (i = count[index]; i < len; i++) {
725 index = start[i];
726 for (unsigned j = 0; j < atoms[index].degree; ++j) {
727 unsigned int nbor = atoms[index].nbrIds[j];
728 touchedPartitions[atoms[nbor].index] = 1;
729 }
730 }
731 for (unsigned int ii = 0; ii < nAtoms; ++ii) {
732 if (touchedPartitions[ii]) {
733 partition = order[ii];
734 if ((count[partition] > 1) && (next[partition] == -2)) {
735 next[partition] = activeset;
737 }
739 }
740 }
741 }
742 }
743} // end of RefinePartitions()
744
745template <typename CompareFunc>
746void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
747 int mode, int *order, int *count, int &activeset, int *next,
748 int *changed, char *touchedPartitions) {
749 unsigned int nAtoms = mol.getNumAtoms();
750 int partition;
751 int offset;
752 int index;
753 int len;
754 int oldPart = 0;
755
756 for (unsigned int i = 0; i < nAtoms; i++) {
757 partition = order[i];
758 oldPart = atoms[partition].index;
759 while (count[partition] > 1) {
761 offset = atoms[partition].index + len - 1;
762 index = order[offset];
763 atoms[index].index = offset;
764 count[partition] = len - 1;
765 count[index] = 1;
766
767 // test for ions, water molecules with no
768 if (atoms[index].degree < 1) {
769 continue;
770 }
771 for (unsigned j = 0; j < atoms[index].degree; ++j) {
772 unsigned int nbor = atoms[index].nbrIds[j];
773 touchedPartitions[atoms[nbor].index] = 1;
774 changed[nbor] = 1;
775 }
776
777 for (unsigned int ii = 0; ii < nAtoms; ++ii) {
778 if (touchedPartitions[ii]) {
779 int npart = order[ii];
780 if ((count[npart] > 1) && (next[npart] == -2)) {
781 next[npart] = activeset;
783 }
785 }
786 }
787 RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
789 }
790 // not sure if this works each time
791 if (atoms[partition].index != oldPart) {
792 i -= 1;
793 }
794 }
795} // end of BreakTies()
796
798 int *order, int *count,
799 canon_atom *atoms);
800
801RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order,
802 int *count, int &activeset,
803 int *next, int *changed);
804
806 std::vector<unsigned int> &res,
807 bool breakTies = true,
808 bool includeChirality = true,
809 bool includeIsotopes = true,
810 bool includeAtomMaps = true);
811
813 const ROMol &mol, std::vector<unsigned int> &res,
814 const boost::dynamic_bitset<> &atomsInPlay,
815 const boost::dynamic_bitset<> &bondsInPlay,
816 const std::vector<std::string> *atomSymbols,
817 const std::vector<std::string> *bondSymbols, bool breakTies,
819
821 const ROMol &mol, std::vector<unsigned int> &res,
822 const boost::dynamic_bitset<> &atomsInPlay,
823 const boost::dynamic_bitset<> &bondsInPlay,
824 const std::vector<std::string> *atomSymbols = nullptr,
825 bool breakTies = true, bool includeChirality = true,
826 bool includeIsotopes = true, bool includeAtomMaps = true) {
827 rankFragmentAtoms(mol, res, atomsInPlay, bondsInPlay, atomSymbols, nullptr,
830};
831
833 std::vector<unsigned int> &res);
834
836 std::vector<Canon::canon_atom> &atoms,
837 bool includeChirality = true);
838
839namespace detail {
841 std::vector<Canon::canon_atom> &atoms,
842 bool includeChirality,
843 const std::vector<std::string> *atomSymbols,
844 const std::vector<std::string> *bondSymbols,
845 const boost::dynamic_bitset<> &atomsInPlay,
846 const boost::dynamic_bitset<> &bondsInPlay,
847 bool needsInit);
848template <typename T>
849void rankWithFunctor(T &ftor, bool breakTies, int *order,
850 bool useSpecial = false, bool useChirality = false,
851 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
852 const boost::dynamic_bitset<> *bondsInPlay = nullptr);
853
854} // namespace detail
855
856} // namespace Canon
857} // namespace RDKit
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
The class for representing atoms.
Definition Atom.h:75
int getAtomicNum() const
returns our atomic number
Definition Atom.h:134
@ CHI_TETRAHEDRAL_CW
tetrahedral: clockwise rotation (SMILES @@)
Definition Atom.h:101
@ CHI_TETRAHEDRAL_CCW
tetrahedral: counter-clockwise rotation (SMILES
Definition Atom.h:102
BondType
the type of Bond
Definition Bond.h:56
BondStereo
the nature of the bond's stereochem (for cis/trans)
Definition Bond.h:95
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:468
int operator()(int i, int j) const
Definition new_canon.h:480
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition new_canon.h:597
int operator()(int i, int j) const
Definition new_canon.h:599
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:143
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:200
const std::vector< StereoGroup > & getStereoGroups() const
Gets a reference to the groups of atoms with relative stereochemistry.
Definition ROMol.h:790
unsigned int getNumAtoms() const
returns our number of atoms
Definition ROMol.h:421
#define RDKIT_GRAPHMOL_EXPORT
Definition export.h:233
void rankWithFunctor(T &ftor, bool breakTies, int *order, bool useSpecial=false, bool useChirality=false, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
void initFragmentCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, bool needsInit)
RDKIT_GRAPHMOL_EXPORT void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, bool breakTies, bool includeChirality, bool includeIsotope, bool includeAtomMaps)
RDKIT_GRAPHMOL_EXPORT void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
RDKIT_GRAPHMOL_EXPORT void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true)
RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
const unsigned int ATNUM_CLASS_OFFSET
Definition new_canon.h:522
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int > > &result)
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition new_canon.h:746
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition new_canon.h:649
RDKIT_GRAPHMOL_EXPORT void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true, bool includeAtomMaps=true)
RDKIT_GRAPHMOL_EXPORT void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
Std stuff.
StereoGroupType
Definition StereoGroup.h:30
bool rdvalue_is(const RDValue_cast_t)
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition hanoiSort.h:151
unsigned int countSwapsToInterconvert(const T &ref, T probe)
Definition utils.h:54
const std::string * p_symbol
Definition new_canon.h:41
Bond::BondType bondType
Definition new_canon.h:34
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition new_canon.h:64
bool operator<(const bondholder &o) const
Definition new_canon.h:63
int compareStereo(const bondholder &o) const
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition new_canon.h:53
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition new_canon.h:46
unsigned int bondStereo
Definition new_canon.h:35
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition new_canon.h:68
unsigned int nbrSymClass
Definition new_canon.h:37
std::vector< bondholder > bonds
Definition new_canon.h:114
std::unique_ptr< int[]> nbrIds
Definition new_canon.h:109
std::vector< int > revistedNeighbors
Definition new_canon.h:113
std::vector< int > neighborNum
Definition new_canon.h:112