RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
DuplicatedSeedCache.h
Go to the documentation of this file.
1//
2// Copyright (C) 2014 Novartis Institutes for BioMedical Research
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#pragma once
12#include <map>
13#include <vector>
14#include <stdexcept>
15#include <algorithm>
16
17namespace RDKit {
18namespace FMCS {
20 public:
21 typedef bool TValue;
22 class TKey {
23 std::vector<unsigned int> AtomIdx; // sorted
24 std::vector<unsigned int> BondIdx; // sorted
25 public:
26 size_t getNumAtoms() const { return AtomIdx.size(); }
27 size_t getNumBonds() const { return BondIdx.size(); }
28
29 void addAtom(unsigned int i) {
30 auto it = std::lower_bound(AtomIdx.begin(), AtomIdx.end(), i);
31 AtomIdx.insert(it, i);
32 }
33 void addBond(unsigned int i) {
34 auto it = std::lower_bound(BondIdx.begin(), BondIdx.end(), i);
35 BondIdx.insert(it, i);
36 }
37
38 bool operator==(const TKey& right) const { // opt.
39 return AtomIdx.size() == right.AtomIdx.size() &&
40 BondIdx.size() == right.BondIdx.size() &&
41 0 == std::memcmp(&AtomIdx[0], &right.AtomIdx[0],
42 AtomIdx.size() * sizeof(unsigned int)) &&
43 0 == std::memcmp(&BondIdx[0], &right.BondIdx[0],
44 BondIdx.size() * sizeof(unsigned int));
45 }
46
47 bool operator<(const TKey& right) const {
48 if (AtomIdx.size() < right.AtomIdx.size()) {
49 return true;
50 }
51 if (AtomIdx.size() > right.AtomIdx.size()) {
52 return false;
53 }
54
55 if (BondIdx.size() < right.BondIdx.size()) {
56 return true;
57 }
58 if (BondIdx.size() > right.BondIdx.size()) {
59 return false;
60 }
61
62 // everything is equal -> perform straight comparison
63 int diff;
64 diff = std::memcmp(&AtomIdx[0], &right.AtomIdx[0],
65 AtomIdx.size() * sizeof(unsigned int));
66 if (diff < 0) {
67 return true;
68 }
69 if (diff > 0) {
70 return false;
71 }
72 return std::memcmp(&BondIdx[0], &right.BondIdx[0],
73 BondIdx.size() * sizeof(unsigned int)) < 0;
74 }
75 };
76
77 private:
78 std::map<TKey, TValue> Index;
79 size_t MaxAtoms{0}; // max key in the cache for fast failed find
80 public:
82 void clear() {
83 Index.clear();
84 MaxAtoms = 0;
85 }
86
87 bool find(const TKey& key, TValue& value) const {
88 value = false;
89 if (key.getNumAtoms() > MaxAtoms) {
90 return false; // fast check if key greater then max key in the cache
91 }
92
93 const auto entryit = Index.find(key);
94 if (Index.end() != entryit) {
95 value = entryit->second;
96 }
97 return Index.end() != entryit;
98 }
99
100 void add(const TKey& key, TValue found = true) {
101 if (key.getNumAtoms() > MaxAtoms) {
102 MaxAtoms = key.getNumAtoms();
103 }
104
105 Index.insert(std::pair<TKey, bool>(key, found));
106 }
107
108 size_t size() const {
109 return Index.size(); // for statistics only
110 }
111};
112} // namespace FMCS
113} // namespace RDKit
bool operator==(const TKey &right) const
bool operator<(const TKey &right) const
void add(const TKey &key, TValue found=true)
bool find(const TKey &key, TValue &value) const
Std stuff.
bool rdvalue_is(const RDValue_cast_t)