RDKit
Open-source cheminformatics and machine learning.
MultiFPBReader.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2016 Greg Landrum
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef RD_MULTIFPBREADER_H_APR2016
11 #define RD_MULTIFPBREADER_H_APR2016
12 /*! \file MultiFPBReader.h
13 
14  \brief contains a class for reading and searching collections of FPB files
15 
16  \b Note that this functionality is experimental and the API may change
17  in future releases.
18 */
19 
20 #include <RDGeneral/Exceptions.h>
22 #include <DataStructs/FPBReader.h>
23 #include <boost/tuple/tuple.hpp>
24 #include <boost/foreach.hpp>
25 
26 namespace RDKit {
27 
28 //! class for reading and searching multiple FPB files
29 /*!
30  basic usage:
31  \code
32  FPBReader r1("foo1.fpb"),r2("foo2.fpb");
33  std::vector<FPBReader *> readers;
34  readers.append(&r1);
35  readers.append(&r2);
36  MultiFPBReader fpbs(readers);
37  fpbs.init();
38  boost::shared_ptr<ExplicitBitVect> ebv = fpbs.getReader(0)->getFP(95);
39  std::vector<boost::tuple<double,unsigned int, unsigned int> > nbrs =
40  fpbs.getTanimotoNeighbors(*ebv.get(), 0.70);
41  \endcode
42 
43  \b Note: this functionality is experimental and the API may change
44  in future releases.
45 
46  <b>Note on thread safety</b>
47  Operations that involve reading from FPB files are not thread safe.
48  This means that the \c init() method is not thread safe and none of the
49  search operations are thread safe when an \c FPBReader is initialized in
50  \c lazyRead mode.
51 
52 */
54  public:
55  typedef boost::tuple<double, unsigned int, unsigned int> ResultTuple;
57  : df_init(false), df_initOnSearch(false), df_takeOwnership(false){};
58 
59  /*!
60  \param initOnSearch: if this is true, the \c init() method on child readers
61  will not be called until the first search is done. This is useful with large
62  FPB readers.
63  */
64  MultiFPBReader(bool initOnSearch)
65  : df_init(false),
66  df_initOnSearch(initOnSearch),
67  df_takeOwnership(false){};
68  /*!
69  \param readers: the set of FPBReader objects to use.
70  \param takeOwnership: if true, we own the memory for the FPBReaders
71  \param initOnSearch: if this is true, the \c init() method on child readers
72  will not be called until the first search is done. This is useful with large
73  FPB readers.
74  */
75  MultiFPBReader(std::vector<FPBReader *> &readers, bool takeOwnership = false,
76  bool initOnSearch = false);
77 
79  df_init = false;
80  if (df_takeOwnership) {
81  BOOST_FOREACH (FPBReader *rdr, d_readers) { delete rdr; };
82  d_readers.clear();
83  }
84  };
85 
86  //! Read the data from the file and initialize internal data structures
87  /*!
88  This must be called before most of the other methods of this clases.
89  It calls the \c init() method on each of the child FPBReaders
90 
91  */
92  void init();
93 
94  //! returns the number of readers
95  unsigned int length() const { return d_readers.size(); };
96  //! returns the number of bits in our fingerprints (all readers are expected
97  //! to have the same length)
98  unsigned int nBits() const;
99 
100  //! returns a particular reader
101  /*!
102 
103  \param which: the reader to return
104 
105  */
106  FPBReader *getReader(unsigned int which);
107 
108  //! adds a new FPBReader to our list
109  /*!
110 
111  This does no error checking on the reader, so be careful.
112 
113  If \c takeOwnership is \c true then we will take ownership of the memory.
114 
115  \param rdr: the reader to add. If we have already been initialized, the
116  reader's \c init() method will be called
117 
118  \returns a count of the current number of readers
119  */
120  unsigned int addReader(FPBReader *rdr) {
121  PRECONDITION(rdr, "no reader provided");
122  d_readers.push_back(rdr);
123  if (df_init) rdr->init();
124  return d_readers.size();
125  };
126 
127  //! returns tanimoto neighbors that are within a similarity threshold
128  /*!
129  The result vector of (similarity,index,reader) tuples is sorted in order
130  of decreasing similarity
131 
132  \param bv the query fingerprint
133  \param threshold the minimum similarity to return
134  \param numThreads Sets the number of threads to use (more than one thread
135  will only be used if the RDKit was build with multithread support) If set to
136  zero, the max supported by the system will be used.
137 
138  */
139  std::vector<ResultTuple> getTanimotoNeighbors(const boost::uint8_t *bv,
140  double threshold = 0.7,
141  int numThreads = 1) const;
142  //! \overload
143  std::vector<ResultTuple> getTanimotoNeighbors(
144  boost::shared_array<boost::uint8_t> bv, double threshold = 0.7,
145  int numThreads = 1) const {
146  return getTanimotoNeighbors(bv.get(), threshold, numThreads);
147  };
148  //! \overload
149  std::vector<ResultTuple> getTanimotoNeighbors(const ExplicitBitVect &ebv,
150  double threshold = 0.7,
151  int numThreads = 1) const;
152 
153  //! returns Tversky neighbors that are within a similarity threshold
154  /*!
155  The result vector of (similarity,index) pairs is sorted in order
156  of decreasing similarity
157 
158  \param bv the query fingerprint
159  \param ca the Tversky a coefficient
160  \param cb the Tversky a coefficient
161  \param threshold the minimum similarity to return
162  \param numThreads Sets the number of threads to use (more than one thread
163  will only be used if the RDKit was build with multithread support) If set to
164  zero, the max supported by the system will be used.
165 
166  */
167  std::vector<ResultTuple> getTverskyNeighbors(const boost::uint8_t *bv,
168  double ca, double cb,
169  double threshold = 0.7,
170  int numThreads = 1) const;
171  //! \overload
172  std::vector<ResultTuple> getTverskyNeighbors(
173  boost::shared_array<boost::uint8_t> bv, double ca, double cb,
174  double threshold = 0.7, int numThreads = 1) const {
175  return getTverskyNeighbors(bv.get(), ca, cb, threshold, numThreads);
176  };
177  //! \overload
178  std::vector<ResultTuple> getTverskyNeighbors(const ExplicitBitVect &ebv,
179  double ca, double cb,
180  double threshold = 0.7,
181  int numThreads = 1) const;
182 
183  //! returns indices of all fingerprints that completely contain this one
184  /*! (i.e. where all the bits set in the query are also set in the db
185  molecule)
186  */
187  std::vector<std::pair<unsigned int, unsigned int> > getContainingNeighbors(
188  const boost::uint8_t *bv, int numThreads = 1) const;
189  //! \overload
190  std::vector<std::pair<unsigned int, unsigned int> > getContainingNeighbors(
191  boost::shared_array<boost::uint8_t> bv, int numThreads = 1) const {
192  return getContainingNeighbors(bv.get(), numThreads);
193  };
194  //! \overload
195  std::vector<std::pair<unsigned int, unsigned int> > getContainingNeighbors(
196  const ExplicitBitVect &ebv, int numThreads = 1) const;
197 
198  private:
199  std::vector<FPBReader *> d_readers;
200  bool df_init, df_initOnSearch, df_takeOwnership;
201 
202  // disable automatic copy constructors and assignment operators
203  // for this class and its subclasses. They will likely be
204  // carrying around stream pointers and copying those is a recipe
205  // for disaster.
207  MultiFPBReader &operator=(const MultiFPBReader &);
208 };
209 }
210 #endif
std::vector< ResultTuple > getTanimotoNeighbors(const boost::uint8_t *bv, double threshold=0.7, int numThreads=1) const
returns tanimoto neighbors that are within a similarity threshold
unsigned int length() const
returns the number of readers
class for reading and searching FPB files
Definition: FPBReader.h:57
boost::tuple< double, unsigned int, unsigned int > ResultTuple
void init()
Read the data from the file and initialize internal data structures.
unsigned int nBits() const
FPBReader * getReader(unsigned int which)
returns a particular reader
std::vector< std::pair< unsigned int, unsigned int > > getContainingNeighbors(const boost::uint8_t *bv, int numThreads=1) const
returns indices of all fingerprints that completely contain this one
class for reading and searching multiple FPB files
std::vector< ResultTuple > getTanimotoNeighbors(boost::shared_array< boost::uint8_t > bv, double threshold=0.7, int numThreads=1) const
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
std::vector< ResultTuple > getTverskyNeighbors(boost::shared_array< boost::uint8_t > bv, double ca, double cb, double threshold=0.7, int numThreads=1) const
void init()
Read the data from the file and initialize internal data structures.
MultiFPBReader(bool initOnSearch)
#define PRECONDITION(expr, mess)
Definition: Invariant.h:107
a class for bit vectors that are densely occupied
std::vector< std::pair< unsigned int, unsigned int > > getContainingNeighbors(boost::shared_array< boost::uint8_t > bv, int numThreads=1) const
std::vector< ResultTuple > getTverskyNeighbors(const boost::uint8_t *bv, double ca, double cb, double threshold=0.7, int numThreads=1) const
returns Tversky neighbors that are within a similarity threshold
unsigned int addReader(FPBReader *rdr)
adds a new FPBReader to our list
contains a simple class for reading and searching FPB files