RDKit
Open-source cheminformatics and machine learning.
MaximumCommonSubgraph.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #pragma once
11 #include <vector>
12 #include <string>
13 #include <stdexcept>
14 #include "../RDKitBase.h"
15 #include "FMCS.h"
16 #include "DebugTrace.h" // algorithm filter definitions
17 #include "SeedSet.h"
18 #include "Target.h"
19 #include "SubstructureCache.h"
20 #include "DuplicatedSeedCache.h"
21 #include "MatchTable.h"
22 #include "TargetMatch.h"
23 #include "RingMatchTableSet.h"
24 
25 namespace RDKit {
26 
27 bool FinalChiralityCheckFunction(const short unsigned c1[],
28  const short unsigned c2[], const ROMol& mol1,
29  const FMCS::Graph& query, const ROMol& mol2,
30  const FMCS::Graph& target,
31  const MCSParameters* p);
32 
33 namespace FMCS {
35  struct MCS { // current result. Reference to a fragment of source molecule
36  std::vector<const Atom*> Atoms;
37  std::vector<const Bond*> Bonds;
38  std::vector<unsigned> AtomsIdx;
39  std::vector<unsigned> BondsIdx; // need for results and size() only !
40  const ROMol* QueryMolecule;
41  std::vector<Target> Targets;
42  };
43 
44  unsigned long long To;
45  MCSProgressData Stat;
46  MCSParameters Parameters;
47  unsigned ThresholdCount; // min number of matching
48  std::vector<const ROMol*> Molecules;
49 #ifdef FAST_SUBSTRUCT_CACHE
50  std::vector<unsigned> QueryAtomLabels; // for code Morgan. Value based on
51  // current functor and parameters
52  std::vector<unsigned> QueryBondLabels; // for code Morgan. Value based on
53  // current functor and parameters
54  SubstructureCache HashCache;
55  MatchTable QueryAtomMatchTable;
56  MatchTable QueryBondMatchTable;
57  RingMatchTableSet RingMatchTables;
58 #endif
59 #ifdef DUP_SUBSTRUCT_CACHE
60  DuplicatedSeedCache DuplicateCache;
61 #endif
62  const ROMol* QueryMolecule;
63  unsigned QueryMoleculeMatchedBonds;
64  unsigned QueryMoleculeMatchedAtoms;
65  std::vector<Target> Targets;
66  SeedSet Seeds;
67  MCS McsIdx;
68 
69  public:
70 #ifdef VERBOSE_STATISTICS_ON
72 #endif
73 
74  MaximumCommonSubgraph(const MCSParameters* params);
75  ~MaximumCommonSubgraph() { clear(); }
76  MCSResult find(const std::vector<ROMOL_SPTR>& mols);
77  const ROMol& getQueryMolecule() const { return *QueryMolecule; }
78  unsigned getMaxNumberBonds() const { return McsIdx.BondsIdx.size(); }
79 
80  unsigned getMaxNumberAtoms() const { return McsIdx.AtomsIdx.size(); }
81  // internal:
82  bool checkIfMatchAndAppend(Seed& seed);
83 
84  private:
85  void clear() {
86  Targets.clear();
87  Molecules.clear();
88  To = nanoClock();
89  }
90  void init();
91  void makeInitialSeeds();
92  bool createSeedFromMCS(size_t newQueryTarget, Seed& seed);
93  bool growSeeds(); // returns false if canceled
94  std::string generateResultSMARTS(const MCS& McsIdx) const;
95 
96  bool match(Seed& seed);
97  bool matchIncrementalFast(Seed& seed, unsigned itarget);
98 };
99 }
100 } // namespace RDKit
MCSResult find(const std::vector< ROMOL_SPTR > &mols)
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:106
bool FinalChiralityCheckFunction(const short unsigned c1[], const short unsigned c2[], const ROMol &mol1, const FMCS::Graph &query, const ROMol &mol2, const FMCS::Graph &target, const MCSParameters *p)
static unsigned long long nanoClock(void)
Definition: DebugTrace.h:95
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
MaximumCommonSubgraph(const MCSParameters *params)
bool checkIfMatchAndAppend(Seed &seed)