RDKit
Open-source cheminformatics and machine learning.
MolHash.h
Go to the documentation of this file.
1 // $Id$
2 //
3 // Copyright (C) 2014 Novartis Institutes for BioMedical Research
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #pragma once
12 #include <map>
13 #include <vector>
14 #include <string>
15 #include <stdexcept>
16 #include <boost/cstdint.hpp>
17 #include "../RDKitBase.h"
18 
19 namespace RDKit {
20 namespace MolHash {
21 typedef boost::uint32_t HashCodeType;
22 
23 HashCodeType generateMoleculeHashCode(
24  const ROMol &mol, const std::vector<unsigned> *atomsToUse = 0,
25  const std::vector<unsigned> *bondsToUse =
26  0, // ?? listed bonds between/to/from excluded atom(s) ??
27  const std::vector<boost::uint32_t> *atomCodes = 0,
28  const std::vector<boost::uint32_t> *bondCodes = 0);
29 
30 enum CodeFlags // bitwise flags to combine and compute atom/bond codes
31 { CF_NO_LABELS = 0x0000,
32  CF_ELEMENT = 0x0001,
33  CF_CHARGE = 0x0002,
34  CF_VALENCE = 0x0004,
35  CF_ISOTOPE = 0x0008,
37  CF_ATOM_AROMATIC = 0x0020,
38  CF_ATOM_ALL = 0x00FF,
40  0x0100, // ignore AROMATIZATION if corresponding flag is not specified
43  CF_BOND_ORDER | CF_BOND_AROMATIZATION, // exact type value with aromatic
45  0x0400, // include bond chirality information into bond code
46  CF_BOND_IN_RING = 0x0800,
47  CF_BOND_ALL = 0xFF00,
48  CF_ALL = 0xFFFF,
49 };
50 
52  const ROMol &mol, boost::uint64_t flags // CodeFlags constants combination
53  ,
54  std::vector<boost::uint32_t> *atomCodes // NULL is allowed
55  ,
56  std::vector<boost::uint32_t> *bondCodes); // NULL is allowed
57 
58 #pragma pack(push, 1)
59 struct HashSet {
60  boost::uint16_t Version;
61  boost::uint16_t Reserved;
62  boost::uint16_t NumAtoms;
63  boost::uint16_t NumBonds;
64  boost::uint32_t FormulaCRC32;
65  HashCodeType NonChiralAtomsHash;
66  HashCodeType NonChiralBondsHash;
67  HashCodeType ChiralAtomsHash;
68  HashCodeType ChiralBondsHash;
69  HashCodeType ChiralityHash;
70 
71  public:
72  HashSet() { memset(this, 0, sizeof(*this)); }
73 };
74 #pragma pack(pop)
75 
76 void generateMoleculeHashSet(const ROMol &mol, HashSet &res,
77  const std::vector<unsigned> *atomsToUse = 0,
78  const std::vector<unsigned> *bondsToUse = 0);
79 
80 std::string generateMoleculeHashSet(
81  const ROMol &mol, const std::vector<unsigned> *atomsToUse = 0,
82  const std::vector<unsigned> *bondsToUse = 0);
83 
84 std::string encode(const void *bin,
85  size_t size); // binary data to Base64 encoded string
86 }
87 }
HashCodeType NonChiralBondsHash
Definition: MolHash.h:66
HashCodeType ChiralBondsHash
Definition: MolHash.h:68
boost::uint16_t NumBonds
Definition: MolHash.h:63
boost::uint32_t FormulaCRC32
Definition: MolHash.h:64
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:106
HashCodeType ChiralityHash
Definition: MolHash.h:69
HashCodeType NonChiralAtomsHash
Definition: MolHash.h:65
boost::uint16_t Reserved
Definition: MolHash.h:61
void generateMoleculeHashSet(const ROMol &mol, HashSet &res, const std::vector< unsigned > *atomsToUse=0, const std::vector< unsigned > *bondsToUse=0)
HashCodeType ChiralAtomsHash
Definition: MolHash.h:67
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
boost::uint16_t NumAtoms
Definition: MolHash.h:62
boost::uint32_t HashCodeType
Definition: MolHash.h:21
HashCodeType generateMoleculeHashCode(const ROMol &mol, const std::vector< unsigned > *atomsToUse=0, const std::vector< unsigned > *bondsToUse=0, const std::vector< boost::uint32_t > *atomCodes=0, const std::vector< boost::uint32_t > *bondCodes=0)
void fillAtomBondCodes(const ROMol &mol, boost::uint64_t flags, std::vector< boost::uint32_t > *atomCodes, std::vector< boost::uint32_t > *bondCodes)
boost::uint16_t Version
Definition: MolHash.h:60
std::string encode(const void *bin, size_t size)