RDKit
Open-source cheminformatics and machine learning.
DuplicatedSeedCache.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #pragma once
11 #include <map>
12 #include <vector>
13 #include <stdexcept>
14 #include <algorithm>
15 
16 namespace RDKit {
17 namespace FMCS {
19  public:
20  typedef bool TValue;
21  class TKey {
22  std::vector<unsigned> AtomIdx; // sorted
23  std::vector<unsigned> BondIdx; // sorted
24  public:
25  size_t getNumAtoms() const { return AtomIdx.size(); }
26  size_t getNumBonds() const { return BondIdx.size(); }
27 
28  void addAtom(unsigned i) {
29  std::vector<unsigned>::iterator it =
30  std::lower_bound(AtomIdx.begin(), AtomIdx.end(), i);
31  AtomIdx.insert(it, i);
32  }
33  void addBond(unsigned i) {
34  std::vector<unsigned>::iterator it =
35  std::lower_bound(BondIdx.begin(), BondIdx.end(), i);
36  BondIdx.insert(it, i);
37  }
38 
39  bool operator==(const TKey& right) const { // opt.
40  return AtomIdx.size() == right.AtomIdx.size() &&
41  BondIdx.size() == right.BondIdx.size() &&
42  0 == memcmp(&AtomIdx[0], &right.AtomIdx[0],
43  AtomIdx.size() * sizeof(unsigned)) &&
44  0 == memcmp(&BondIdx[0], &right.BondIdx[0],
45  BondIdx.size() * sizeof(unsigned));
46  }
47 
48  bool operator<(const TKey& right) const {
49  if (AtomIdx.size() < right.AtomIdx.size()) return true;
50  if (AtomIdx.size() > right.AtomIdx.size()) return false;
51 
52  if (BondIdx.size() < right.BondIdx.size()) return true;
53  if (BondIdx.size() > right.BondIdx.size()) return false;
54 
55  // everything is equal -> perform straight comparision
56  int diff;
57  diff = memcmp(&AtomIdx[0], &right.AtomIdx[0],
58  AtomIdx.size() * sizeof(unsigned));
59  if (diff < 0) return true;
60  if (diff > 0) return false;
61  return memcmp(&BondIdx[0], &right.BondIdx[0],
62  BondIdx.size() * sizeof(unsigned)) < 0;
63  }
64  };
65 
66  private:
67  std::map<TKey, TValue> Index;
68  size_t MaxAtoms; // max key in the cache for fast failed find
69  public:
70  DuplicatedSeedCache() : MaxAtoms(0) {}
71  void clear() {
72  Index.clear();
73  MaxAtoms = 0;
74  }
75 
76  bool find(const TKey& key, TValue& value) const {
77  value = false;
78  if (key.getNumAtoms() > MaxAtoms)
79  return false; // fast check if key greater then max key in the cache
80 
81  std::map<TKey, TValue>::const_iterator entryit = Index.find(key);
82  if (Index.end() != entryit) value = entryit->second;
83  return Index.end() != entryit;
84  }
85 
86  void add(const TKey& key, TValue found = true) {
87  if (key.getNumAtoms() > MaxAtoms) MaxAtoms = key.getNumAtoms();
88 
89  Index.insert(std::pair<TKey, bool>(key, found));
90  }
91 
92  size_t size() const {
93  return Index.size(); // for statistics only
94  }
95 };
96 }
97 }
bool operator==(const TKey &right) const
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
void add(const TKey &key, TValue found=true)
bool operator<(const TKey &right) const
bool find(const TKey &key, TValue &value) const