RDKit
Open-source cheminformatics and machine learning.
SubstructureCache.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #pragma once
11 #include <list>
12 #include <vector>
13 #include <string>
14 #include <stdexcept>
15 #include "../RDKitBase.h"
16 //#include "../Fingerprints/MorganFingerprints.h"
17 #include "Graph.h"
18 #include "Seed.h"
19 #include "DebugTrace.h" // algorithm filter definitions
20 
21 namespace RDKit {
22 namespace FMCS {
24  public:
25 #pragma pack(push, 1)
27  typedef unsigned long long TValue;
28  TValue Value;
29 
30  public:
31  KeyNumericMetrics() : Value(0) {}
32  };
33 #pragma pack(pop)
34 
35  struct HashKey {
37 
38  public:
39  void computeKey(const Seed& seed,
40  const std::vector<unsigned>& queryAtomLabels,
41  const std::vector<unsigned>& queryBondLabels) {
42  computeMorganCodeHash(seed, queryAtomLabels, queryBondLabels);
43  }
44 
45  private:
46  void computeMorganCodeHash(const Seed& seed,
47  const std::vector<unsigned>& queryAtomLabels,
48  const std::vector<unsigned>& queryBondLabels) {
49  size_t nv = seed.getNumAtoms();
50  size_t ne = seed.getNumBonds();
51  std::vector<unsigned long> currCodes(nv);
52  std::vector<unsigned long> prevCodes(nv);
53  size_t nIterations = seed.getNumBonds();
54  if (nIterations > 5) nIterations = 5;
55 
56  for (unsigned seedAtomIdx = 0; seedAtomIdx < seed.getNumAtoms();
57  seedAtomIdx++)
58  currCodes[seedAtomIdx] =
59  queryAtomLabels[seed.MoleculeFragment.AtomsIdx[seedAtomIdx]];
60 
61  for (size_t iter = 0; iter < nIterations; iter++) {
62  for (size_t i = 0; i < nv; i++) prevCodes[i] = currCodes[i];
63 
64  for (size_t seedBondIdx = 0; seedBondIdx < ne; seedBondIdx++) {
65  const Bond* bond = seed.MoleculeFragment.Bonds[seedBondIdx];
66  unsigned order =
67  queryBondLabels[seed.MoleculeFragment.BondsIdx[seedBondIdx]];
68  unsigned atom1 =
70  ->second;
71  unsigned atom2 =
73  ->second;
74  unsigned v1 = prevCodes[atom1];
75  unsigned v2 = prevCodes[atom2];
76 
77  currCodes[atom1] += v2 * v2 + (v2 + 23) * (order + 1721);
78  currCodes[atom2] += v1 * v1 + (v1 + 23) * (order + 1721);
79  }
80  }
81 
82  KeyNumericMetrics::TValue result = 0;
83  for (unsigned seedAtomIdx = 0; seedAtomIdx < nv; seedAtomIdx++) {
84  unsigned long code = currCodes[seedAtomIdx];
85  result += code * (code + 6849) + 29;
86  }
87 
88  NumericMetrics.Value = result;
89  }
90 
91  // not implemented yet:
92  /*
93  void computeFingerprint(const Seed& seed)
94  {
95  unsigned int radius = seed.getNumBonds();
96  if (radius > 5)
97  radius = 5;
98  ExplicitBitVect *mf =
99  RDKit::MorganFingerprints::getFingerprintAsBitVect(seed.GraphTopology,
100  radius); //SLOW !!!
101  // ...
102  delete mf;
103  NumericMetrics.Field.hasFingerprint = 1;
104  }
105  */
106  };
107 
108  typedef HashKey TKey;
109  typedef std::list<FMCS::Graph> TIndexEntry; // hash-key is not unique key
110  private:
111  std::vector<TIndexEntry> ValueStorage;
112  std::map<KeyNumericMetrics::TValue, size_t> NumericIndex; // TIndexEntry
113  public:
114  // returns computed key, and pointer to index entry with a set of subgraphs
115  // corresponding to the key if found.
116  // then caller must find exactly matched subgraph in the result set with own
117  // search algorithm,
118  // including a resolving of collisions of hash key
119  TIndexEntry* find(const Seed& seed,
120  const std::vector<unsigned>& queryAtomLabels,
121  const std::vector<unsigned>& queryBondLabels,
122  TKey& key) { // compute key and find entry
123  key.computeKey(seed, queryAtomLabels, queryBondLabels);
124  std::map<KeyNumericMetrics::TValue, size_t>::const_iterator entryit =
125  NumericIndex.find(key.NumericMetrics.Value);
126  if (NumericIndex.end() != entryit) return &ValueStorage[entryit->second];
127  return NULL; // not found
128  }
129 
130  // if find() did not found any entry for this key of seed a new entry will be
131  // created
132  void add(const Seed& seed, TKey& key,
133  TIndexEntry* entry) { // "compute" value and store it in NEW entry
134  // if not found
135  if (!entry) {
136  try {
137  ValueStorage.push_back(TIndexEntry());
138  } catch (...) {
139  return; // not enought memory room to add the item, but it's just a
140  // cache
141  }
142  entry = &ValueStorage.back();
143  }
144  entry->push_back(seed.Topology);
145 
146  if (!NumericIndex.insert(std::pair<KeyNumericMetrics::TValue, size_t>(
147  key.NumericMetrics.Value,
148  ValueStorage.size() - 1)).second)
149  return; // not enought memory room to add the item, but it is just cache
150  }
151 
152  size_t keyssize() const { // for statistics only
153  return ValueStorage.size();
154  }
155 
156  size_t fullsize() const { // for statistics only
157  size_t n = 0;
158  for (std::vector<TIndexEntry>::const_iterator e = ValueStorage.begin();
159  e != ValueStorage.end(); e++)
160  n += e->size();
161  return n;
162  }
163 };
164 }
165 }
void computeKey(const Seed &seed, const std::vector< unsigned > &queryAtomLabels, const std::vector< unsigned > &queryBondLabels)
std::vector< unsigned > BondsIdx
Definition: Seed.h:27
std::map< unsigned, unsigned > SeedAtomIdxMap
Definition: Seed.h:28
void add(const Seed &seed, TKey &key, TIndexEntry *entry)
unsigned int getBeginAtomIdx() const
returns the index of our begin Atom
Definition: Bond.h:178
unsigned getNumBonds() const
Definition: Seed.h:126
Graph Topology
Definition: Seed.h:70
Std stuff.
Definition: Atom.h:29
unsigned int getEndAtomIdx() const
returns the index of our end Atom
Definition: Bond.h:185
class for representing a bond
Definition: Bond.h:47
std::list< FMCS::Graph > TIndexEntry
MolFragment MoleculeFragment
Definition: Seed.h:69
std::vector< const Bond * > Bonds
Definition: Seed.h:25
TIndexEntry * find(const Seed &seed, const std::vector< unsigned > &queryAtomLabels, const std::vector< unsigned > &queryBondLabels, TKey &key)
unsigned getNumAtoms() const
Definition: Seed.h:125
std::vector< unsigned > AtomsIdx
Definition: Seed.h:26