RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
new_canon.h
Go to the documentation of this file.
1//
2// Copyright (C) 2014 Greg Landrum
3// Adapted from pseudo-code from Roger Sayle
4//
5// @@ All Rights Reserved @@
6// This file is part of the RDKit.
7// The contents are covered by the terms of the BSD license
8// which is included in the file license.txt, found at the root
9// of the RDKit source tree.
10//
11
12#include <RDGeneral/export.h>
13#include <RDGeneral/hanoiSort.h>
14#include <GraphMol/ROMol.h>
15#include <GraphMol/RingInfo.h>
18#include <cstdint>
19#include <boost/dynamic_bitset.hpp>
21#include <cstring>
22#include <iostream>
23#include <cassert>
24#include <cstring>
25#include <vector>
26
27// #define VERBOSE_CANON 1
28
29namespace RDKit {
30namespace Canon {
31struct canon_atom;
32
34 Bond::BondType bondType{Bond::BondType::UNSPECIFIED};
35 unsigned int bondStereo{
36 static_cast<unsigned int>(Bond::BondStereo::STEREONONE)};
37 unsigned int nbrSymClass{0};
38 unsigned int nbrIdx{0};
39 Bond::BondStereo stype{Bond::BondStereo::STEREONONE};
40 const canon_atom *controllingAtoms[4]{nullptr, nullptr, nullptr, nullptr};
41 const std::string *p_symbol{
42 nullptr}; // if provided, this is used to order bonds
43 unsigned int bondIdx{0};
44
47 unsigned int nsc, unsigned int bidx)
48 : bondType(bt),
49 bondStereo(static_cast<unsigned int>(bs)),
50 nbrSymClass(nsc),
51 nbrIdx(ni),
52 bondIdx(bidx) {}
53 bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
54 unsigned int nsc, unsigned int bidx)
55 : bondType(bt),
56 bondStereo(bs),
57 nbrSymClass(nsc),
58 nbrIdx(ni),
59 bondIdx(bidx) {}
60
61 int compareStereo(const bondholder &o) const;
62
63 bool operator<(const bondholder &o) const { return compare(*this, o) < 0; }
64 static bool greater(const bondholder &lhs, const bondholder &rhs) {
65 return compare(lhs, rhs) > 0;
66 }
67
68 static int compare(const bondholder &x, const bondholder &y,
69 unsigned int div = 1) {
70 if (x.p_symbol && y.p_symbol) {
71 if ((*x.p_symbol) < (*y.p_symbol)) {
72 return -1;
73 } else if ((*x.p_symbol) > (*y.p_symbol)) {
74 return 1;
75 }
76 }
77 if (x.bondType < y.bondType) {
78 return -1;
79 } else if (x.bondType > y.bondType) {
80 return 1;
81 }
82 if (x.bondStereo < y.bondStereo) {
83 return -1;
84 } else if (x.bondStereo > y.bondStereo) {
85 return 1;
86 }
87 auto scdiv = x.nbrSymClass / div - y.nbrSymClass / div;
88 if (scdiv) {
89 return scdiv;
90 }
91 if (x.bondStereo && y.bondStereo) {
92 auto cs = x.compareStereo(y);
93 if (cs) {
94 return cs;
95 }
96 }
97 return 0;
98 }
99};
101 const Atom *atom{nullptr};
102 int index{-1};
103 unsigned int degree{0};
104 unsigned int totalNumHs{0};
105 bool hasRingNbr{false};
106 bool isRingStereoAtom{false};
107 unsigned int whichStereoGroup{0};
108 StereoGroupType typeOfStereoGroup{StereoGroupType::STEREO_ABSOLUTE};
109 std::unique_ptr<int[]> nbrIds;
110 const std::string *p_symbol{
111 nullptr}; // if provided, this is used to order atoms
112 std::vector<int> neighborNum;
113 std::vector<int> revistedNeighbors;
114 std::vector<bondholder> bonds;
115};
116
118 canon_atom *atoms, std::vector<bondholder> &nbrs);
119
121 canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
122 std::vector<std::pair<unsigned int, unsigned int>> &result);
123
124/*
125 * Different types of atom compare functions:
126 *
127 * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
128 *dependent chirality
129 * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
130 *highly symmetrical graphs/molecules
131 * - AtomCompareFunctor: Basic atom compare function which also allows to
132 *include neighbors within the ranking
133 */
134
136 public:
137 Canon::canon_atom *dp_atoms{nullptr};
138 const ROMol *dp_mol{nullptr};
139 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
140 *dp_bondsInPlay{nullptr};
141
144 Canon::canon_atom *atoms, const ROMol &m,
145 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
146 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
147 : dp_atoms(atoms),
148 dp_mol(&m),
149 dp_atomsInPlay(atomsInPlay),
150 dp_bondsInPlay(bondsInPlay) {}
151 int operator()(int i, int j) const {
152 PRECONDITION(dp_atoms, "no atoms");
153 PRECONDITION(dp_mol, "no molecule");
154 PRECONDITION(i != j, "bad call");
155 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
156 return 0;
157 }
158
159 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
160 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
161 }
162 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
163 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
164 }
165 for (unsigned int ii = 0;
166 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
167 int cmp =
168 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
169 if (cmp) {
170 return cmp;
171 }
172 }
173
174 std::vector<std::pair<unsigned int, unsigned int>> swapsi;
175 std::vector<std::pair<unsigned int, unsigned int>> swapsj;
176 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
177 updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
178 }
179 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
180 updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
181 }
182 for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
183 int cmp = swapsi[ii].second - swapsj[ii].second;
184 if (cmp) {
185 return cmp;
186 }
187 }
188 return 0;
189 }
190};
191
193 public:
194 Canon::canon_atom *dp_atoms{nullptr};
195 const ROMol *dp_mol{nullptr};
196 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
197 *dp_bondsInPlay{nullptr};
198
201 Canon::canon_atom *atoms, const ROMol &m,
202 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
203 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
204 : dp_atoms(atoms),
205 dp_mol(&m),
206 dp_atomsInPlay(atomsInPlay),
207 dp_bondsInPlay(bondsInPlay) {}
208 int operator()(int i, int j) const {
209 PRECONDITION(dp_atoms, "no atoms");
210 PRECONDITION(dp_mol, "no molecule");
211 PRECONDITION(i != j, "bad call");
212 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
213 return 0;
214 }
215
216 if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
217 return -1;
218 } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
219 return 1;
220 }
221
222 if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
223 return -1;
224 } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
225 return 1;
226 }
227
228 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
229 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
230 }
231 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
232 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
233 }
234 for (unsigned int ii = 0;
235 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
236 int cmp =
237 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
238 if (cmp) {
239 return cmp;
240 }
241 }
242
243 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
244 return -1;
245 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
246 return 1;
247 }
248 return 0;
249 }
250};
251
252namespace {
253unsigned int getChiralRank(const ROMol *dp_mol, canon_atom *dp_atoms,
254 unsigned int i) {
255 unsigned int res = 0;
256 std::vector<unsigned int> perm;
257 perm.reserve(dp_atoms[i].atom->getDegree());
258 for (const auto nbr : dp_mol->atomNeighbors(dp_atoms[i].atom)) {
259 auto rnk = dp_atoms[nbr->getIdx()].index;
260 // make sure we don't have duplicate ranks
261 if (std::find(perm.begin(), perm.end(), rnk) != perm.end()) {
262 break;
263 } else {
264 perm.push_back(rnk);
265 }
266 }
267 if (perm.size() == dp_atoms[i].atom->getDegree()) {
268 auto ctag = dp_atoms[i].atom->getChiralTag();
271 auto sortedPerm = perm;
272 std::sort(sortedPerm.begin(), sortedPerm.end());
275 if (nswaps % 2) {
276 res = res == 2 ? 1 : 2;
277 }
278 }
279 }
280 return res;
281}
282} // namespace
284 unsigned int getAtomRingNbrCode(unsigned int i) const {
285 if (!dp_atoms[i].hasRingNbr) {
286 return 0;
287 }
288
289 auto nbrs = dp_atoms[i].nbrIds.get();
290 unsigned int code = 0;
291 for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
292 if (dp_atoms[nbrs[j]].isRingStereoAtom) {
293 code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
294 }
295 }
296 return code;
297 }
298
299 int basecomp(int i, int j) const {
300 unsigned int ivi, ivj;
301
302 // always start with the current class:
303 ivi = dp_atoms[i].index;
304 ivj = dp_atoms[j].index;
305 if (ivi < ivj) {
306 return -1;
307 } else if (ivi > ivj) {
308 return 1;
309 }
310
311 if (df_useNonStereoRanks) {
312 // use the non-stereo ranks if they were assigned
313 int rankingNumber_i = 0;
314 int rankingNumber_j = 0;
315 dp_atoms[i].atom->getPropIfPresent(
316 common_properties::_CanonicalRankingNumber, rankingNumber_i);
317 dp_atoms[j].atom->getPropIfPresent(
318 common_properties::_CanonicalRankingNumber, rankingNumber_j);
319 if (rankingNumber_i < rankingNumber_j) {
320 return -1;
321 } else if (rankingNumber_i > rankingNumber_j) {
322 return 1;
323 }
324 }
325
326 if (df_useAtomMaps) {
327 // use the atom-mapping numbers if they were assigned
328 int molAtomMapNumber_i = 0;
329 int molAtomMapNumber_j = 0;
330 dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
331 molAtomMapNumber_i);
332 dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
333 molAtomMapNumber_j);
334 if (molAtomMapNumber_i < molAtomMapNumber_j) {
335 return -1;
336 } else if (molAtomMapNumber_i > molAtomMapNumber_j) {
337 return 1;
338 }
339 }
340 // start by comparing degree
341 ivi = dp_atoms[i].degree;
342 ivj = dp_atoms[j].degree;
343 if (ivi < ivj) {
344 return -1;
345 } else if (ivi > ivj) {
346 return 1;
347 }
348 if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
349 if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol)) {
350 return -1;
351 } else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol)) {
352 return 1;
353 } else {
354 return 0;
355 }
356 }
357
358 // move onto atomic number
359 ivi = dp_atoms[i].atom->getAtomicNum();
360 ivj = dp_atoms[j].atom->getAtomicNum();
361 if (ivi < ivj) {
362 return -1;
363 } else if (ivi > ivj) {
364 return 1;
365 }
366 // isotopes if we're using them
367 if (df_useIsotopes) {
368 ivi = dp_atoms[i].atom->getIsotope();
369 ivj = dp_atoms[j].atom->getIsotope();
370 if (ivi < ivj) {
371 return -1;
372 } else if (ivi > ivj) {
373 return 1;
374 }
375 }
376
377 // nHs
378 ivi = dp_atoms[i].totalNumHs;
379 ivj = dp_atoms[j].totalNumHs;
380 if (ivi < ivj) {
381 return -1;
382 } else if (ivi > ivj) {
383 return 1;
384 }
385 // charge
386 ivi = dp_atoms[i].atom->getFormalCharge();
387 ivj = dp_atoms[j].atom->getFormalCharge();
388 if (ivi < ivj) {
389 return -1;
390 } else if (ivi > ivj) {
391 return 1;
392 }
393 // presence of specified chirality if it's being used
394 if (df_useChiralPresence) {
395 ivi =
396 dp_atoms[i].atom->getChiralTag() != Atom::ChiralType::CHI_UNSPECIFIED;
397 ivj =
398 dp_atoms[j].atom->getChiralTag() != Atom::ChiralType::CHI_UNSPECIFIED;
399 if (ivi < ivj) {
400 return -1;
401 } else if (ivi > ivj) {
402 return 1;
403 }
404 }
405 // chirality if we're using it
406 if (df_useChirality) {
407 // look at enhanced stereo - whichStereoGroup == 0 means no stereo
408 ivi = dp_atoms[i].whichStereoGroup; // can't use the index itself, but if
409 // it's set then we're in an SG
410 ivj = dp_atoms[j].whichStereoGroup;
411 if (ivi || ivj) {
412 if (ivi && !ivj) {
413 return 1;
414 } else if (ivj && !ivi) {
415 return -1;
416 } else if (ivi && ivj) {
417 auto iType = dp_atoms[i].typeOfStereoGroup;
418 auto jType = dp_atoms[j].typeOfStereoGroup;
419 if (iType < jType) {
420 return -1;
421 } else if (iType > jType) {
422 return 1;
423 }
424 if (ivi != ivj) {
425 // now check the current classes of the other members of the SG
426 std::set<unsigned int> sgi;
427 for (const auto sgat :
428 dp_mol->getStereoGroups()[ivi - 1].getAtoms()) {
429 sgi.insert(dp_atoms[sgat->getIdx()].index);
430 }
431 std::set<unsigned int> sgj;
432 for (const auto sgat :
433 dp_mol->getStereoGroups()[ivj - 1].getAtoms()) {
434 sgj.insert(dp_atoms[sgat->getIdx()].index);
435 }
436 if (sgi < sgj) {
437 return -1;
438 } else if (sgi > sgj) {
439 return 1;
440 }
441 } else { // same stereo group
442 if (iType == StereoGroupType::STEREO_ABSOLUTE) {
443 ivi = getChiralRank(dp_mol, dp_atoms, i);
444 ivj = getChiralRank(dp_mol, dp_atoms, j);
445 if (ivi < ivj) {
446 return -1;
447 } else if (ivi > ivj) {
448 return 1;
449 }
450 }
451 }
452 }
453 } else {
454 // if there's no stereogroup, then use whatever atom stereochem is
455 // specfied:
456 ivi = 0;
457 ivj = 0;
458 // can't actually use values here, because they are arbitrary
459 ivi = dp_atoms[i].atom->getChiralTag() != 0;
460 ivj = dp_atoms[j].atom->getChiralTag() != 0;
461 if (ivi < ivj) {
462 return -1;
463 } else if (ivi > ivj) {
464 return 1;
465 }
466 // stereo set
467 if (ivi && ivj) {
468 if (ivi) {
469 ivi = getChiralRank(dp_mol, dp_atoms, i);
470 }
471 if (ivj) {
472 ivj = getChiralRank(dp_mol, dp_atoms, j);
473 }
474 if (ivi < ivj) {
475 return -1;
476 } else if (ivi > ivj) {
477 return 1;
478 }
479 }
480 }
481 }
482
483 if (df_useChiralityRings) {
484 // ring stereochemistry
485 ivi = getAtomRingNbrCode(i);
486 ivj = getAtomRingNbrCode(j);
487 if (ivi < ivj) {
488 return -1;
489 } else if (ivi > ivj) {
490 return 1;
491 } // bond stereo is taken care of in the neighborhood comparison
492 }
493 return 0;
494 }
495
496 public:
497 Canon::canon_atom *dp_atoms{nullptr};
498 const ROMol *dp_mol{nullptr};
499 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
500 *dp_bondsInPlay{nullptr};
501 bool df_useNbrs{false};
502 bool df_useIsotopes{true};
503 bool df_useChirality{true};
504 bool df_useChiralityRings{true};
505 bool df_useAtomMaps{true};
506 bool df_useNonStereoRanks{false};
507 bool df_useChiralPresence{true};
508
511 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
512 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
513 : dp_atoms(atoms),
514 dp_mol(&m),
515 dp_atomsInPlay(atomsInPlay),
516 dp_bondsInPlay(bondsInPlay) {}
517
518 int operator()(int i, int j) const {
519 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
520 return 0;
521 }
522 int v = basecomp(i, j);
523 if (v) {
524 return v;
525 }
526
527 if (df_useNbrs) {
528 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
529 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
530 }
531 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
532 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
533 }
534
535 for (unsigned int ii = 0;
536 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
537 ++ii) {
538 int cmp =
539 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
540 if (cmp) {
541 return cmp;
542 }
543 }
544
545 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
546 return -1;
547 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
548 return 1;
549 }
550 }
551 return 0;
552 }
553};
554
555/*
556 * A compare function to discriminate chiral atoms, similar to the CIP rules.
557 * This functionality is currently not used.
558 */
559
560const unsigned int ATNUM_CLASS_OFFSET = 10000;
562 void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
563 for (unsigned j = 0; j < nbrs.size(); ++j) {
564 unsigned int nbrIdx = nbrs[j].nbrIdx;
565 if (nbrIdx == ATNUM_CLASS_OFFSET) {
566 // Ignore the Hs
567 continue;
568 }
569 const Atom *nbr = dp_atoms[nbrIdx].atom;
570 nbrs[j].nbrSymClass =
571 nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
572 }
573 std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
574 // FIX: don't want to be doing this long-term
575 }
576
577 int basecomp(int i, int j) const {
578 PRECONDITION(dp_atoms, "no atoms");
579 unsigned int ivi, ivj;
580
581 // always start with the current class:
582 ivi = dp_atoms[i].index;
583 ivj = dp_atoms[j].index;
584 if (ivi < ivj) {
585 return -1;
586 } else if (ivi > ivj) {
587 return 1;
588 }
589
590 // move onto atomic number
591 ivi = dp_atoms[i].atom->getAtomicNum();
592 ivj = dp_atoms[j].atom->getAtomicNum();
593 if (ivi < ivj) {
594 return -1;
595 } else if (ivi > ivj) {
596 return 1;
597 }
598
599 // isotopes:
600 ivi = dp_atoms[i].atom->getIsotope();
601 ivj = dp_atoms[j].atom->getIsotope();
602 if (ivi < ivj) {
603 return -1;
604 } else if (ivi > ivj) {
605 return 1;
606 }
607
608 // atom stereochem:
609 ivi = 0;
610 ivj = 0;
611 std::string cipCode;
612 if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
613 cipCode)) {
614 ivi = cipCode == "R" ? 2 : 1;
615 }
616 if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
617 cipCode)) {
618 ivj = cipCode == "R" ? 2 : 1;
619 }
620 if (ivi < ivj) {
621 return -1;
622 } else if (ivi > ivj) {
623 return 1;
624 }
625
626 // bond stereo is taken care of in the neighborhood comparison
627 return 0;
628 }
629
630 public:
631 Canon::canon_atom *dp_atoms{nullptr};
632 const ROMol *dp_mol{nullptr};
633 bool df_useNbrs{false};
636 : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false) {}
637 int operator()(int i, int j) const {
638 PRECONDITION(dp_atoms, "no atoms");
639 PRECONDITION(dp_mol, "no molecule");
640 PRECONDITION(i != j, "bad call");
641 int v = basecomp(i, j);
642 if (v) {
643 return v;
644 }
645
646 if (df_useNbrs) {
647 getAtomNeighborhood(dp_atoms[i].bonds);
648 getAtomNeighborhood(dp_atoms[j].bonds);
649
650 // we do two passes through the neighbor lists. The first just uses the
651 // atomic numbers (by passing the optional 10000 to bondholder::compare),
652 // the second takes the already-computed index into account
653 for (unsigned int ii = 0;
654 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
655 ++ii) {
656 int cmp = bondholder::compare(
657 dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
658 if (cmp) {
659 return cmp;
660 }
661 }
662 for (unsigned int ii = 0;
663 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
664 ++ii) {
665 int cmp =
666 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
667 if (cmp) {
668 return cmp;
669 }
670 }
671 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
672 return -1;
673 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
674 return 1;
675 }
676 }
677 return 0;
678 }
679};
680
681/*
682 * Basic canonicalization function to organize the partitions which will be
683 * sorted next.
684 * */
685
686template <typename CompareFunc>
688 int mode, int *order, int *count, int &activeset,
689 int *next, int *changed, char *touchedPartitions) {
690 unsigned int nAtoms = mol.getNumAtoms();
691 int partition;
692 int symclass = 0;
693 int *start;
694 int offset;
695 int index;
696 int len;
697 int i;
698 // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
699
700 // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
701 while (activeset != -1) {
702 // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
703 // std::cerr<<" next: ";
704 // for(unsigned int ii=0;ii<nAtoms;++ii){
705 // std::cerr<<ii<<":"<<next[ii]<<" ";
706 // }
707 // std::cerr<<std::endl;
708 // for(unsigned int ii=0;ii<nAtoms;++ii){
709 // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
710 // "<<atoms[order[ii]].index<<std::endl;
711 // }
712
714 activeset = next[partition];
715 next[partition] = -2;
716
718 offset = atoms[partition].index;
719 start = order + offset;
720 // std::cerr<<"\n\n**************************************************************"<<std::endl;
721 // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
722 // for(unsigned int ii=0;ii<len;++ii){
723 // std::cerr<<" "<<order[offset+ii]+1;
724 // }
725 // std::cerr<<std::endl;
726 // for(unsigned int ii=0;ii<nAtoms;++ii){
727 // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
728 // "<<atoms[order[ii]].index<<std::endl;
729 // }
731 // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
732 // std::cerr<<" result:";
733 // for(unsigned int ii=0;ii<nAtoms;++ii){
734 // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
735 // "<<atoms[order[ii]].index<<std::endl;
736 // }
737 for (int k = 0; k < len; ++k) {
738 changed[start[k]] = 0;
739 }
740
741 index = start[0];
742 // std::cerr<<" len:"<<len<<" index:"<<index<<"
743 // count:"<<count[index]<<std::endl;
744 for (i = count[index]; i < len; i++) {
745 index = start[i];
746 if (count[index]) {
747 symclass = offset + i;
748 }
749 atoms[index].index = symclass;
750 // std::cerr<<" "<<index+1<<"("<<symclass<<")";
751 // if(mode && (activeset<0 || count[index]>count[activeset]) ){
752 // activeset=index;
753 //}
754 for (unsigned j = 0; j < atoms[index].degree; ++j) {
755 changed[atoms[index].nbrIds[j]] = 1;
756 }
757 }
758 // std::cerr<<std::endl;
759
760 if (mode) {
761 index = start[0];
762 for (i = count[index]; i < len; i++) {
763 index = start[i];
764 for (unsigned j = 0; j < atoms[index].degree; ++j) {
765 unsigned int nbor = atoms[index].nbrIds[j];
766 touchedPartitions[atoms[nbor].index] = 1;
767 }
768 }
769 for (unsigned int ii = 0; ii < nAtoms; ++ii) {
770 if (touchedPartitions[ii]) {
771 partition = order[ii];
772 if ((count[partition] > 1) && (next[partition] == -2)) {
773 next[partition] = activeset;
775 }
777 }
778 }
779 }
780 }
781} // end of RefinePartitions()
782
783template <typename CompareFunc>
784void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
785 int mode, int *order, int *count, int &activeset, int *next,
786 int *changed, char *touchedPartitions) {
787 unsigned int nAtoms = mol.getNumAtoms();
788 int partition;
789 int offset;
790 int index;
791 int len;
792 int oldPart = 0;
793
794 for (unsigned int i = 0; i < nAtoms; i++) {
795 partition = order[i];
796 oldPart = atoms[partition].index;
797 while (count[partition] > 1) {
799 offset = atoms[partition].index + len - 1;
800 index = order[offset];
801 atoms[index].index = offset;
802 count[partition] = len - 1;
803 count[index] = 1;
804
805 // test for ions, water molecules with no
806 if (atoms[index].degree < 1) {
807 continue;
808 }
809 for (unsigned j = 0; j < atoms[index].degree; ++j) {
810 unsigned int nbor = atoms[index].nbrIds[j];
811 touchedPartitions[atoms[nbor].index] = 1;
812 changed[nbor] = 1;
813 }
814
815 for (unsigned int ii = 0; ii < nAtoms; ++ii) {
816 if (touchedPartitions[ii]) {
817 int npart = order[ii];
818 if ((count[npart] > 1) && (next[npart] == -2)) {
819 next[npart] = activeset;
821 }
823 }
824 }
825 RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
827 }
828 // not sure if this works each time
829 if (atoms[partition].index != oldPart) {
830 i -= 1;
831 }
832 }
833} // end of BreakTies()
834
836 int *order, int *count,
837 canon_atom *atoms);
838
839RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order,
840 int *count, int &activeset,
841 int *next, int *changed);
842
844 const ROMol &mol, std::vector<unsigned int> &res, bool breakTies = true,
845 bool includeChirality = true, bool includeIsotopes = true,
846 bool includeAtomMaps = true, bool includeChiralPresence = false,
847 bool includeStereoGroups = true, bool useNonStereoRanks = false);
848
850 const ROMol &mol, std::vector<unsigned int> &res,
851 const boost::dynamic_bitset<> &atomsInPlay,
852 const boost::dynamic_bitset<> &bondsInPlay,
853 const std::vector<std::string> *atomSymbols,
854 const std::vector<std::string> *bondSymbols, bool breakTies,
857
859 const ROMol &mol, std::vector<unsigned int> &res,
860 const boost::dynamic_bitset<> &atomsInPlay,
861 const boost::dynamic_bitset<> &bondsInPlay,
862 const std::vector<std::string> *atomSymbols = nullptr,
863 bool breakTies = true, bool includeChirality = true,
864 bool includeIsotopes = true, bool includeAtomMaps = true,
865 bool includeChiralPresence = false) {
866 rankFragmentAtoms(mol, res, atomsInPlay, bondsInPlay, atomSymbols, nullptr,
869};
870
872 std::vector<unsigned int> &res);
873
875 std::vector<Canon::canon_atom> &atoms,
876 bool includeChirality = true,
877 bool includeStereoGroups = true);
878
879namespace detail {
881 std::vector<Canon::canon_atom> &atoms,
882 bool includeChirality,
883 const std::vector<std::string> *atomSymbols,
884 const std::vector<std::string> *bondSymbols,
885 const boost::dynamic_bitset<> &atomsInPlay,
886 const boost::dynamic_bitset<> &bondsInPlay,
887 bool needsInit);
888template <typename T>
889void rankWithFunctor(T &ftor, bool breakTies, int *order,
890 bool useSpecial = false, bool useChirality = false,
891 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
892 const boost::dynamic_bitset<> *bondsInPlay = nullptr);
893
894} // namespace detail
895
896} // namespace Canon
897} // namespace RDKit
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
The class for representing atoms.
Definition Atom.h:75
int getAtomicNum() const
returns our atomic number
Definition Atom.h:134
@ CHI_TETRAHEDRAL_CW
tetrahedral: clockwise rotation (SMILES @@)
Definition Atom.h:101
@ CHI_TETRAHEDRAL_CCW
tetrahedral: counter-clockwise rotation (SMILES
Definition Atom.h:102
BondType
the type of Bond
Definition Bond.h:56
BondStereo
the nature of the bond's stereochem (for cis/trans)
Definition Bond.h:95
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:510
int operator()(int i, int j) const
Definition new_canon.h:518
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition new_canon.h:635
int operator()(int i, int j) const
Definition new_canon.h:637
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:143
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:200
const std::vector< StereoGroup > & getStereoGroups() const
Gets a reference to the groups of atoms with relative stereochemistry.
Definition ROMol.h:790
unsigned int getNumAtoms() const
returns our number of atoms
Definition ROMol.h:421
#define RDKIT_GRAPHMOL_EXPORT
Definition export.h:233
void rankWithFunctor(T &ftor, bool breakTies, int *order, bool useSpecial=false, bool useChirality=false, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
void initFragmentCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, bool needsInit)
RDKIT_GRAPHMOL_EXPORT void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true, bool includeStereoGroups=true)
RDKIT_GRAPHMOL_EXPORT void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
RDKIT_GRAPHMOL_EXPORT void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, bool breakTies, bool includeChirality, bool includeIsotope, bool includeAtomMaps, bool includeChiralPresence)
RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
const unsigned int ATNUM_CLASS_OFFSET
Definition new_canon.h:560
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int > > &result)
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition new_canon.h:784
RDKIT_GRAPHMOL_EXPORT void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true, bool includeAtomMaps=true, bool includeChiralPresence=false, bool includeStereoGroups=true, bool useNonStereoRanks=false)
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition new_canon.h:687
RDKIT_GRAPHMOL_EXPORT void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
Std stuff.
StereoGroupType
Definition StereoGroup.h:31
bool rdvalue_is(const RDValue_cast_t)
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition hanoiSort.h:151
unsigned int countSwapsToInterconvert(const T &ref, T probe)
Definition utils.h:54
const std::string * p_symbol
Definition new_canon.h:41
Bond::BondType bondType
Definition new_canon.h:34
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition new_canon.h:64
bool operator<(const bondholder &o) const
Definition new_canon.h:63
int compareStereo(const bondholder &o) const
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition new_canon.h:53
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition new_canon.h:46
unsigned int bondStereo
Definition new_canon.h:35
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition new_canon.h:68
unsigned int nbrSymClass
Definition new_canon.h:37
std::vector< bondholder > bonds
Definition new_canon.h:114
std::unique_ptr< int[]> nbrIds
Definition new_canon.h:109
std::vector< int > revistedNeighbors
Definition new_canon.h:113
std::vector< int > neighborNum
Definition new_canon.h:112