RDKit
Open-source cheminformatics and machine learning.
Seed.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #pragma once
11 #include <map>
12 #include "../RDKitBase.h"
13 #include "DebugTrace.h" // algorithm optimisation definitions
14 #include "Graph.h"
15 #include "DuplicatedSeedCache.h"
16 #include "SubstructMatchCustom.h"
17 
18 namespace RDKit {
19 namespace FMCS {
20 class MaximumCommonSubgraph;
21 struct TargetMatch;
22 
23 struct MolFragment { // Reference to a fragment of source molecule
24  std::vector<const Atom*> Atoms;
25  std::vector<const Bond*> Bonds;
26  std::vector<unsigned> AtomsIdx;
27  std::vector<unsigned> BondsIdx; // need for results and size() only !
28  std::map<unsigned, unsigned> SeedAtomIdxMap; // Full Query Molecule to Seed
29  // indeces backward conversion
30  // map
31 };
32 
33 struct NewBond {
34  unsigned SourceAtomIdx; // index in the seed. Atom is already in the seed
35  unsigned BondIdx; // index in qmol of new bond scheduled to be added into
36  // seed. This is outgoing bond from SourceAtomIdx
37  unsigned NewAtomIdx; // index in qmol of new atom scheduled to be added into
38  // seed. Another end of new bond
39  const Atom* NewAtom; // pointer to qmol's new atom scheduled to be added into
40  // seed. Another end of new bond
41  unsigned EndAtomIdx; // index in the seed. RING. "New" Atom on the another
42  // end of new bond is already exists in the seed.
43 
45  : SourceAtomIdx(-1),
46  BondIdx(-1),
47  NewAtomIdx(-1),
48  NewAtom(0),
49  EndAtomIdx(-1) {}
50 
51  NewBond(unsigned from_atom, unsigned bond_idx, unsigned new_atom,
52  unsigned to_atom, const Atom* a)
53  : SourceAtomIdx(from_atom),
54  BondIdx(bond_idx),
55  NewAtomIdx(new_atom),
56  NewAtom(a),
57  EndAtomIdx(to_atom) {}
58 };
59 
60 class Seed {
61  private:
62  mutable std::vector<NewBond> NewBonds; // for multistage growing. all
63  // directly connected outgoing bonds
64  public:
65  bool CopyComplete; // this seed has been completely copied into list.
66  // postponed non0locked copy for MULTI_THREAD
67  mutable unsigned GrowingStage; // 0 new seed; -1 finished; n>0 in progress,
68  // exact stage of growing for SDF
69  MolFragment MoleculeFragment; // Reference to a fragment of source molecule
70  Graph Topology; // seed topology with references to source molecule
71 
72  std::vector<bool> ExcludedBonds;
73  unsigned LastAddedAtomsBeginIdx; // in this subgraph for improving
74  // performance of future growing
75  unsigned LastAddedBondsBeginIdx; // in this subgraph for DEBUG ONLY
76  unsigned RemainingBonds;
77  unsigned RemainingAtoms;
78 #ifdef DUP_SUBSTRUCT_CACHE
80 #endif
81  std::vector<TargetMatch> MatchResult; // for each target
82  public:
83  Seed()
84  : CopyComplete(false),
85  GrowingStage(0),
86  LastAddedAtomsBeginIdx(0),
87  LastAddedBondsBeginIdx(0),
88  RemainingBonds(-1),
89  RemainingAtoms(-1) {}
90 
91  void setMoleculeFragment(const Seed& src) {
92  MoleculeFragment = src.MoleculeFragment;
93  }
94  Seed& operator=(const Seed& src) {
95  NewBonds = src.NewBonds;
96  GrowingStage = src.GrowingStage;
97  MoleculeFragment = src.MoleculeFragment;
98  Topology = src.Topology;
99  ExcludedBonds = src.ExcludedBonds;
100  LastAddedAtomsBeginIdx = src.LastAddedAtomsBeginIdx;
101  LastAddedBondsBeginIdx = src.LastAddedBondsBeginIdx;
102  RemainingBonds = src.RemainingBonds;
103  RemainingAtoms = src.RemainingAtoms;
104 #ifdef DUP_SUBSTRUCT_CACHE
105  DupCacheKey = src.DupCacheKey;
106 #endif
107  MatchResult = src.MatchResult;
108  CopyComplete = true; // LAST
109  return *this;
110  }
111  void createFromParent(const Seed* parent) {
112  MoleculeFragment = parent->MoleculeFragment;
113  Topology = parent->Topology;
114  ExcludedBonds = parent->ExcludedBonds;
115  RemainingBonds = parent->RemainingBonds;
116  RemainingAtoms = parent->RemainingAtoms;
117 #ifdef DUP_SUBSTRUCT_CACHE
118  DupCacheKey = parent->DupCacheKey;
119 #endif
120  LastAddedAtomsBeginIdx = getNumAtoms(); // previous size
121  LastAddedBondsBeginIdx = getNumBonds(); // previous size
122  GrowingStage = 0;
123  }
124 
125  unsigned getNumAtoms() const { return MoleculeFragment.AtomsIdx.size(); }
126  unsigned getNumBonds() const { return MoleculeFragment.BondsIdx.size(); }
127 
128  void grow(MaximumCommonSubgraph& mcs) const;
129  bool canGrowBiggerThan(unsigned maxBonds,
130  unsigned maxAtoms) const { // prune()
131  return RemainingBonds + getNumBonds() > maxBonds ||
132  (RemainingBonds + getNumBonds() == maxBonds &&
133  RemainingAtoms + getNumAtoms() > maxAtoms);
134  }
135  void computeRemainingSize(const ROMol& qmol);
136 
137  unsigned addAtom(const Atom* atom);
138  unsigned addBond(const Bond* bond);
139  void fillNewBonds(const ROMol& qmol);
140 };
141 }
142 }
unsigned BondIdx
Definition: Seed.h:35
std::vector< TargetMatch > MatchResult
Definition: Seed.h:81
std::vector< unsigned > BondsIdx
Definition: Seed.h:27
std::map< unsigned, unsigned > SeedAtomIdxMap
Definition: Seed.h:28
void setMoleculeFragment(const Seed &src)
Definition: Seed.h:91
unsigned SourceAtomIdx
Definition: Seed.h:34
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:106
unsigned RemainingAtoms
Definition: Seed.h:77
unsigned RemainingBonds
Definition: Seed.h:76
unsigned NewAtomIdx
Definition: Seed.h:37
std::vector< bool > ExcludedBonds
Definition: Seed.h:72
unsigned getNumBonds() const
Definition: Seed.h:126
unsigned LastAddedAtomsBeginIdx
Definition: Seed.h:73
unsigned GrowingStage
Definition: Seed.h:67
Graph Topology
Definition: Seed.h:70
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
unsigned EndAtomIdx
Definition: Seed.h:41
class for representing a bond
Definition: Bond.h:47
bool CopyComplete
Definition: Seed.h:65
bool canGrowBiggerThan(unsigned maxBonds, unsigned maxAtoms) const
Definition: Seed.h:129
MolFragment MoleculeFragment
Definition: Seed.h:69
DuplicatedSeedCache::TKey DupCacheKey
Definition: Seed.h:79
std::vector< const Bond * > Bonds
Definition: Seed.h:25
NewBond(unsigned from_atom, unsigned bond_idx, unsigned new_atom, unsigned to_atom, const Atom *a)
Definition: Seed.h:51
void createFromParent(const Seed *parent)
Definition: Seed.h:111
Seed & operator=(const Seed &src)
Definition: Seed.h:94
unsigned LastAddedBondsBeginIdx
Definition: Seed.h:75
unsigned getNumAtoms() const
Definition: Seed.h:125
const Atom * NewAtom
Definition: Seed.h:39
std::vector< const Atom * > Atoms
Definition: Seed.h:24
The class for representing atoms.
Definition: Atom.h:68
std::vector< unsigned > AtomsIdx
Definition: Seed.h:26