Catalog.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (C) 2003-2006 Rational Discovery LLC
00003 //
00004 //  @@ All Rights Reserved @@
00005 //
00006 
00007 #ifndef __RD_CATALOG_H__
00008 #define __RD_CATALOG_H__
00009 
00010 // Boost graph stuff
00011 #include <boost/graph/graph_traits.hpp>
00012 #include <boost/graph/adjacency_list.hpp>
00013 #include <boost/property_map.hpp>
00014 
00015 // for some typedefs
00016 #include <RDGeneral/types.h>
00017 #include <RDGeneral/StreamOps.h>
00018 
00019 namespace RDCatalog {
00020   const int versionMajor=1;
00021   const int versionMinor=0;
00022   const int versionPatch=0;
00023   const int endianId=0xDEADBEEF;
00024   
00025   //-----------------------------------------------------------------------------
00026   //! abstract base class for a catalog object
00027   template <class entryType, class paramType>
00028   class Catalog {
00029   public:
00030     //------------------------------------
00031     Catalog() : d_fpLength(0), dp_cParams(0) {};
00032 
00033     //------------------------------------
00034     virtual ~Catalog(){
00035       if (dp_cParams) {
00036         delete dp_cParams;
00037       }
00038     }
00039     
00040     //------------------------------------
00041     //! return a serialized form of the Catalog as an std::string
00042     virtual std::string Serialize() const = 0;
00043     
00044     //------------------------------------
00045     //! adds an entry to the catalog
00046     /*!
00047 
00048       \param entry          the entry to be added
00049       \param updateFPLength (optional) if this is true, our internal
00050       fingerprint length will also be updated.
00051       
00052     */
00053     virtual unsigned int addEntry(entryType *entry, bool updateFPLength = true) = 0;
00054     
00055     //------------------------------------
00056     //! returns a particular entry in the Catalog
00057     virtual const entryType* getEntryWithIdx(unsigned int idx) const = 0;
00058     
00059     //------------------------------------
00060     //! returns the number of entries
00061     virtual unsigned int getNumEntries() const = 0;
00062     
00063     //------------------------------------
00064     //! returns the length of our fingerprint
00065     unsigned int getFPLength() const {return d_fpLength;}
00066     
00067     //------------------------------------
00068     //! sets our fingerprint length
00069     void setFPLength(unsigned int val) {d_fpLength = val;}
00070     
00071     //------------------------------------
00072     //! sets our parameters by copying the \c params argument
00073     void setCatalogParams(paramType *params) {
00074       PRECONDITION(params,"bad parameter object");
00075       //if we already have a paramter object throw an exception
00076       PRECONDITION(!dp_cParams,"A parameter object already exists on the catalog" );
00077       /*
00078         if (dp_cParams) {
00079         // we already have parameter object on the catalog
00080         // can't overwrite it
00081         PRECONDITION(0, "A parameter object already exist on the catalog");
00082         }*/
00083       dp_cParams = new paramType(*params);
00084     }
00085     
00086     //------------------------------------
00087     //! returns a pointer to our parameters
00088     const paramType *getCatalogParams() const { return dp_cParams;}
00089 
00090   protected:
00091     // this is the ID that will be assigned to the next entry 
00092     // added to the catalog - need not be same as the number of entries 
00093     // in the catalog and does not correspond with the
00094     // id of the entry in the catalog.
00095     // this is more along the lines of bitId
00096     unsigned int d_fpLength; //!< the length of our fingerprint
00097     paramType *dp_cParams;   //!< our params object
00098 
00099   };
00100 
00101 
00102 
00103   //-----------------------------------------------------------------------------
00104   //! A Catalog with a hierarchical structure
00105   /*!
00106 
00107     The entries of a HierarchCatalog are arranged in a directed graph
00108 
00109     <b>The difference between <i>Indices</i> and <i>Bit Ids</i></b>
00110     
00111     A HierarchCatalog may contain more entries than the user is actually
00112     interested in.  For example a HierarchCatalog constructed to contain
00113     orders 5 through 8 may well contain information about orders 1-5,
00114     in order to facilitate some search optimizations.
00115 
00116     - <i>Bit Ids</i> refer to the "interesting" bits.  
00117     So, in the above example, Bit Id \c 0 will be the first entry
00118     with order 5.
00119     - <i>Indices</i> refer to the underlying structure of the catalog.
00120     So, in the above example, the entry with index \c 0 will be
00121     the first entry with order 1.
00122 
00123   */
00124   template <class entryType, class paramType, class orderType> 
00125   class HierarchCatalog : public Catalog <entryType, paramType> {
00126     // the entries in the catalog can be traversed using the edges
00127     // in a desired order
00128   public:
00129     //! used by the BGL to set up the node properties in our graph
00130     struct vertex_entry_t {
00131       enum { num=1003 };
00132       typedef boost::vertex_property_tag kind;
00133     };
00134     typedef boost::property<vertex_entry_t, entryType *> EntryProperty;
00135     
00136     //! the type of the graph itself:
00137     typedef boost::adjacency_list<boost::vecS,
00138                                          boost::vecS, // FIX: should be using setS for edges so that parallel edges are never added (page 225 BGL book)
00139       // but that seems result in compile errors
00140       boost::bidirectionalS,
00141              EntryProperty> CatalogGraph;
00142     
00143     typedef boost::graph_traits<CatalogGraph> CAT_GRAPH_TRAITS;
00144     typedef typename CAT_GRAPH_TRAITS::vertex_iterator VER_ITER;
00145     typedef std::pair<VER_ITER, VER_ITER> ENT_ITER_PAIR;
00146     typedef typename CAT_GRAPH_TRAITS::adjacency_iterator DOWN_ENT_ITER;
00147     typedef std::pair<DOWN_ENT_ITER, DOWN_ENT_ITER> DOWN_ENT_ITER_PAIR;
00148     
00149     //------------------------------------
00150     HierarchCatalog<entryType, paramType, orderType>() {};
00151     
00152     //------------------------------------
00153     //! Construct by making a copy of the input \c params object
00154     HierarchCatalog<entryType, paramType, orderType>(paramType *params) : Catalog<entryType,paramType>() {
00155       this->setCatalogParams(params);
00156     }
00157 
00158     //------------------------------------
00159     //! Construct from a \c pickle (a serialized form of the HierarchCatalog)
00160     HierarchCatalog<entryType, paramType, orderType>(const std::string &pickle) {
00161       this->initFromString(pickle);
00162     }
00163     
00164     //------------------------------------
00165     ~HierarchCatalog() {
00166       destroy();
00167     }
00168 
00169     //------------------------------------
00170     //! serializes this object to a stream
00171     void toStream(std::ostream &ss) const {
00172       PRECONDITION(this->getCatalogParams(),"NULL parameter object");
00173       
00174       // the i/o header:
00175       RDKit::streamWrite(ss,endianId);
00176       RDKit::streamWrite(ss,versionMajor);
00177       RDKit::streamWrite(ss,versionMinor);
00178       RDKit::streamWrite(ss,versionPatch);
00179 
00180       // information about the catalog itself:
00181       int tmpUInt;
00182       tmpUInt = this->getFPLength();
00183       RDKit::streamWrite(ss,tmpUInt);
00184       tmpUInt = this->getNumEntries();
00185       RDKit::streamWrite(ss,tmpUInt);
00186 
00187       //std::cout << ">>>>-------------------------------" << std::endl;
00188       //std::cout << "\tlength: " << getFPLength() << " " << getNumEntries() << std::endl;
00189 
00190       // add the params object:
00191       this->getCatalogParams()->toStream(ss);
00192       //std::cout << "\tparams: " << getCatalogParams()->getLowerFragLength();
00193       //std::cout << " " << getCatalogParams()->getUpperFragLength();
00194       //std::cout << " " << getCatalogParams()->getNumFuncGroups();
00195       //std::cout << std::endl;
00196       
00197       // write the entries in order:
00198       for(unsigned int i=0;i<getNumEntries();i++){
00199         this->getEntryWithIdx(i)->toStream(ss);
00200       }
00201 
00202       // finally the adjacency list:
00203       for(unsigned int i=0;i<getNumEntries();i++){
00204         RDKit::INT_VECT children=this->getDownEntryList(i);
00205         tmpUInt = children.size();
00206         RDKit::streamWrite(ss,tmpUInt);
00207         for(RDKit::INT_VECT::const_iterator ivci=children.begin();
00208             ivci!=children.end();
00209             ivci++){
00210           RDKit::streamWrite(ss,*ivci);
00211         }
00212       }
00213     }
00214 
00215     //------------------------------------
00216     //! serializes this object and returns the resulting \c pickle
00217     std::string Serialize() const {
00218       std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
00219       this->toStream(ss);
00220       return ss.str();
00221     }
00222 
00223     //------------------------------------
00224     //! fills the contents of this object from a stream containing a \c pickle
00225     void initFromStream(std::istream &ss) {
00226       int tmpInt;
00227       // FIX: at the moment we ignore the header info:
00228       RDKit::streamRead(ss,tmpInt);
00229       RDKit::streamRead(ss,tmpInt);
00230       RDKit::streamRead(ss,tmpInt);
00231       RDKit::streamRead(ss,tmpInt);
00232       
00233       unsigned int tmpUInt;
00234       RDKit::streamRead(ss,tmpUInt);// fp length
00235       this->setFPLength(tmpUInt);
00236       
00237       unsigned int numEntries;
00238       RDKit::streamRead(ss,numEntries);
00239       //std::cout << "<<<-------------------------------" << std::endl;
00240       //std::cout << "\tlength: " << getFPLength() << " " << numEntries << std::endl;
00241 
00242 
00243       // grab the params:
00244       paramType *params = new paramType();
00245       params->initFromStream(ss);
00246       this->setCatalogParams(params);
00247 
00248       //std::cout << "\tparams: " << getCatalogParams()->getLowerFragLength();
00249       //std::cout << " " << getCatalogParams()->getUpperFragLength();
00250       //std::cout << " " << getCatalogParams()->getNumFuncGroups();
00251       //std::cout << std::endl;
00252 
00253       // now all of the entries:
00254       for(unsigned int i=0;i<numEntries;i++){
00255         entryType *entry = new entryType();
00256         entry->initFromStream(ss);
00257         this->addEntry(entry,false);
00258       }
00259 
00260       // and, finally, the adjacency list:
00261       for(unsigned int i=0;i<numEntries;i++){
00262         unsigned int nNeighbors;
00263         RDKit::streamRead(ss,nNeighbors);
00264         for(unsigned int j=0;j<nNeighbors;j++){
00265           RDKit::streamRead(ss,tmpInt);
00266           this->addEdge(i,tmpInt);
00267         }
00268       }
00269     }
00270     
00271     //------------------------------------
00272     unsigned int getNumEntries() const {
00273       return boost::num_vertices(d_graph);
00274     }
00275 
00276     //------------------------------------
00277     //! fills the contents of this object from a string containing a \c pickle
00278     void initFromString(const std::string &text){
00279       std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
00280       // initialize the stream:
00281       ss.write(text.c_str(),text.length());
00282       // now start reading out values:
00283       this->initFromStream(ss);
00284     }
00285 
00286     //------------------------------------
00287     //! add a new entry to the catalog
00288     /*!
00289 
00290       \param entry          the entry to be added
00291       \param updateFPLength (optional) if this is true, our internal
00292       fingerprint length will also be updated.
00293 
00294     */
00295     unsigned int addEntry(entryType *entry, bool updateFPLength = true){ 
00296       PRECONDITION(entry,"bad arguments");
00297       if (updateFPLength) {
00298         unsigned int fpl = this->getFPLength();
00299         entry->setBitId(fpl);
00300         fpl++;
00301         this->setFPLength(fpl);
00302       }
00303       unsigned int eid = boost::add_vertex(EntryProperty(entry), d_graph);
00304       orderType etype = entry->getOrder();
00305       // REVIEW: this initialization is not required: the STL map, in
00306       // theory, will create a new object when operator[] is called
00307       // for a new item
00308       if (d_orderMap.find(etype) == d_orderMap.end()) {
00309         RDKit::INT_VECT nets;
00310         d_orderMap[etype] = nets;
00311       }
00312       d_orderMap[etype].push_back(eid);
00313       return eid;
00314     }
00315 
00316     //------------------------------------
00317     //! adds an edge between two entries in the catalog
00318     /*!
00319       Since we are using a bidirectional graph - the order in 
00320       which the ids are supplied here makes a difference
00321 
00322       \param id1 index of the edge's beginning
00323       \param id2 index of the edge's end
00324 
00325     */
00326     void addEdge(unsigned int id1, unsigned int id2) {
00327       unsigned int nents = getNumEntries();
00328       RANGE_CHECK(0, id1, nents-1);
00329       RANGE_CHECK(0, id2, nents-1);
00330       // FIX: if we boost::setS for the edgeList BGL will
00331       // do the checking for duplicity (parallel edges)
00332       // But for reasons unknown setS results in compile
00333       // errors while using adjacent_vertices.
00334       typename CAT_GRAPH_TRAITS::edge_descriptor edge;
00335       bool found;
00336       boost::tie(edge,found) = boost::edge(boost::vertex(id1,d_graph),
00337                                            boost::vertex(id2,d_graph),
00338                                            d_graph);
00339       if (!found) {
00340         boost::add_edge(id1, id2, d_graph);
00341       }
00342     }
00343 
00344     //------------------------------------
00345     //! returns a pointer to our entry with a particular index 
00346     const entryType *getEntryWithIdx(unsigned int idx) const {
00347       RANGE_CHECK(0,idx,getNumEntries()-1);
00348       int vd = boost::vertex(idx, d_graph);
00349       typename boost::property_map < CatalogGraph, vertex_entry_t>::const_type 
00350         pMap = boost::get(vertex_entry_t(), d_graph);
00351       return pMap[vd];
00352     }
00353 
00354     //------------------------------------
00355     //! returns a pointer to our entry with a particular bit ID
00356     const entryType *getEntryWithBitId(unsigned int idx) const {
00357       RANGE_CHECK(0,idx,this->getFPLength()-1);
00358       typename boost::property_map < CatalogGraph, vertex_entry_t>::const_type 
00359         pMap = boost::get(vertex_entry_t(), d_graph);
00360       const entryType *res=NULL;
00361       for(unsigned int i=idx;i<this->getNumEntries();i++){
00362         const entryType *e=pMap[i];
00363         if(e->getBitId()==idx){
00364           res=e;
00365           break;
00366         }
00367       }
00368       return res;
00369     }
00370 
00371     //------------------------------------
00372     //! returns the index of the entry with a particular bit ID
00373     int getIdOfEntryWithBitId(unsigned int idx) const {
00374       RANGE_CHECK(0,idx,this->getFPLength()-1);
00375       typename boost::property_map < CatalogGraph, vertex_entry_t>::const_type 
00376         pMap = boost::get(vertex_entry_t(), d_graph);
00377       int res=-1;
00378       for(unsigned int i=idx;i<this->getNumEntries();i++){
00379         const entryType *e=pMap[i];
00380         if(static_cast<unsigned int>(e->getBitId())==idx){
00381           res=i;
00382           break;
00383         }
00384       }
00385       return res;
00386     }
00387 
00388     //------------------------------------
00389     //! returns a list of the indices of entries below the one passed in
00390     RDKit::INT_VECT getDownEntryList(unsigned int idx) const {
00391       RDKit::INT_VECT res;
00392       DOWN_ENT_ITER nbrIdx, endIdx;
00393       boost::tie(nbrIdx, endIdx) = boost::adjacent_vertices(idx, d_graph);
00394       while (nbrIdx != endIdx) {
00395         res.push_back(*nbrIdx);
00396         nbrIdx++;
00397       }
00398       //std::cout << res.size() << "\n";
00399       return res;
00400     }
00401 
00402     //------------------------------------
00403     //! returns a list of the indices that have a particular order 
00404     const RDKit::INT_VECT &getEntriesOfOrder(orderType ord) {
00405       return d_orderMap[ord];
00406     }
00407 
00408     //------------------------------------
00409     //! returns a list of the indices that have a particular order
00410     /*!
00411       \overload
00412     */
00413     const RDKit::INT_VECT &getEntriesOfOrder(orderType ord) const {
00414       typename std::map<orderType, RDKit::INT_VECT>::const_iterator elem;
00415       elem = d_orderMap.find(ord);
00416       CHECK_INVARIANT(elem!=d_orderMap.end()," catalog does not contain any entries of the order specified");
00417       return elem->second;
00418     }
00419 
00420     
00421   private:
00422     // graphs that store the entries in the catalog in a hierachical manner
00423     CatalogGraph d_graph;
00424     // a  map that maps the order type of entries in the catalog to 
00425     // a vector of vertex indices in the graphs above
00426     // e.g.  for a catalog with molecular fragments, the order of a fragment can 
00427     // simply be the number of bond in it. The list this oder maps to is all the
00428     // vertex ids of these fragment in the catalog that have this many bonds in them
00429     std::map<orderType, RDKit::INT_VECT> d_orderMap;
00430 
00431     //------------------------------------
00432     //! clear any memory that we've used
00433     void destroy() {
00434       ENT_ITER_PAIR entItP = boost::vertices(d_graph);
00435       typename boost::property_map < CatalogGraph, vertex_entry_t>::type 
00436         pMap = boost::get(vertex_entry_t(), d_graph);
00437       while (entItP.first != entItP.second) {
00438         delete pMap[*(entItP.first++)];
00439       }
00440     }
00441 
00442 
00443     
00444   };
00445 
00446 
00447   //-----------------------------------------------------------------------------
00448   //! a linear Catalog (analogous to an std::vector)
00449   /*!
00450     Here there is no particular hierarchy, simply a
00451     collection of entries.
00452   */
00453   template <class entryType, class orderType> 
00454   class LinearCatalog : public Catalog <entryType, orderType> {
00455     // here there is no particular hierarchy of entries
00456     // we simply model it as a vector of entries
00457     // FIX: for retrieval purposes a better model map be std::map
00458 
00459   public:
00460     std::string Serialize();
00461     
00462     unsigned int addEntry(entryType *entry, bool updateFPLength = true);
00463     
00464     const entryType *getEntryWithIdx(unsigned int idx) const;
00465 
00466   private:
00467     std::vector<entryType*> d_vector;
00468   };
00469 }
00470 
00471 #endif

Generated on Sat May 24 08:36:32 2008 for RDCode by  doxygen 1.5.3