BitOps.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (C) 2003-2006 greg Landrum and Rational Discovery LLC
00003 //
00004 //  @@ All Rights Reserved @@
00005 //
00006 #ifndef __RD_BITOPS_H__
00007 #define __RD_BITOPS_H__
00008 /*! \file BitOps.h
00009 
00010   \brief Contains general bit-comparison and similarity operations.
00011 
00012   The notation used to document the similarity metrics is:
00013     - \c V1_n: number of bits in vector 1
00014     - \c V1_o: number of on bits in vector 1
00015     - <tt>(V1&V2)_o</tt>: number of on bits in the intersection of vectors 1 and 2
00016   
00017  */
00018 
00019 #include "BitVects.h"
00020 #include <string>
00021 
00022 
00023 //! general purpose wrapper for calculating the similarity between two bvs
00024 //! that may be of unequal size (will automatically fold as appropriate)
00025 template <typename T>
00026 double SimilarityWrapper(const T &bv1,const T &bv2,
00027                          const double (*metric)(const T &,const T &),
00028                          bool returnDistance=false){
00029   double res=0.0;
00030   if(bv1.GetNumBits()>bv2.GetNumBits()){
00031     T *bv1tmp = FoldFingerprint(bv1,bv1.GetNumBits()/bv2.GetNumBits());
00032     res = metric(*bv1tmp,bv2);
00033     delete bv1tmp;
00034   } else if(bv2.GetNumBits()>bv1.GetNumBits()){
00035     T *bv2tmp = FoldFingerprint(bv2,bv2.GetNumBits()/bv1.GetNumBits());
00036     res = metric(bv1,*bv2tmp);
00037     delete bv2tmp;
00038   } else {
00039     res = metric(bv1,bv2);
00040   }
00041   if(returnDistance) res = 1.0-res;
00042   return res;
00043 }
00044 
00045 
00046 bool AllProbeBitsMatch(const char *probe,const char *ref);
00047 bool AllProbeBitsMatch(const std::string &probe,const std::string &ref);
00048 
00049   
00050 template <typename T1>
00051 bool AllProbeBitsMatch(const T1 &probe,const std::string &pkl);
00052 
00053 
00054 //! returns the number of on bits in common between two bit vectors
00055 /*!
00056   \return (bv1&bv2)_o
00057 */
00058 template <typename T1, typename T2>
00059 int
00060 NumOnBitsInCommon(const T1& bv1,const T2& bv2);
00061 
00062 int
00063 NumOnBitsInCommon(const ExplicitBitVect & bv1,const ExplicitBitVect & bv2);
00064 
00065 //! returns the Tanimoto similarity between two bit vects
00066 /*!
00067   \return <tt>(bv1&bv2)_o / [bv1_o + bv2_o - (bv1&bv2)_o]</tt>
00068 */
00069 template <typename T1, typename T2>
00070 const double
00071 TanimotoSimilarity(const T1& bv1,const T2& bv2);
00072 
00073 //! returns the Cosine similarity between two bit vects
00074 /*!
00075   \return <tt>(bv1&bv2)_o / sqrt(bv1_o + bv2_o)</tt>
00076 */
00077 template <typename T1, typename T2>
00078 const double
00079 CosineSimilarity(const T1& bv1,
00080                  const T2& bv2);
00081 
00082 //! returns the Kulczynski similarity between two bit vects
00083 /*!
00084   \return <tt>(bv1&bv2)_o * [bv1_o + bv2_o] / [2 * bv1_o * bv2_o]</tt>
00085 */
00086 template <typename T1, typename T2>
00087 const double
00088 KulczynskiSimilarity(const T1& bv1,
00089                      const T2& bv2);
00090 
00091 //! returns the Dice similarity between two bit vects
00092 /*!
00093   \return <tt>2*(bv1&bv2)_o / [bv1_o + bv2_o]</tt>
00094 */
00095 template <typename T1, typename T2>
00096 const double
00097 DiceSimilarity(const T1& bv1,
00098                const T2& bv2);
00099 
00100 //! returns the Sokal similarity between two bit vects
00101 /*!
00102   \return <tt>(bv1&bv2)_o / [2*bv1_o + 2*bv2_o - 3*(bv1&bv2)_o]</tt>
00103 */
00104 template <typename T1, typename T2>
00105 const double
00106 SokalSimilarity(const T1& bv1,
00107                 const T2& bv2);
00108 
00109 //! returns the McConnaughey similarity between two bit vects
00110 /*!
00111   \return <tt>[(bv1&bv2)_o * (bv1_o + bv2_o) - (bv1_o * bv2_o)] / (bv1_o * bv2_o)</tt>
00112 */
00113 template <typename T1, typename T2>
00114 const double
00115 McConnaugheySimilarity(const T1& bv1,
00116                        const T2& bv2);
00117 
00118 //! returns the Asymmetric similarity between two bit vects
00119 /*!
00120   \return <tt>(bv1&bv2)_o / min(bv1_o,bv2_o)</tt>
00121 */
00122 template <typename T1, typename T2>
00123 const double
00124 AsymmetricSimilarity(const T1& bv1,
00125                      const T2& bv2);
00126 
00127 //! returns the Braun-Blanquet similarity between two bit vects
00128 /*!
00129   \return <tt>(bv1&bv2)_o / max(bv1_o,bv2_o)</tt>
00130 */
00131 template <typename T1, typename T2>
00132 const double
00133 BraunBlanquetSimilarity(const T1& bv1,
00134                         const T2& bv2);
00135 
00136 //! returns the Russel similarity between two bit vects
00137 /*!
00138   \return <tt>(bv1&bv2)_o / bv1_o</tt>
00139 
00140   <b>Note:</b> that this operation is non-commutative:
00141     RusselSimilarity(bv1,bv2) != RusselSimilarity(bv2,bv1)
00142 
00143 */
00144 template <typename T1, typename T2>
00145 const double
00146 RusselSimilarity(const T1& bv1,
00147                  const T2& bv2);
00148 
00149 
00150 //! returns the on bit similarity between two bit vects
00151 /*!
00152   \return <tt>(bv1&bv2)_o / (bv1|bv2)_o </tt>
00153 */
00154 template <typename T1, typename T2>
00155 const double
00156 OnBitSimilarity(const T1& bv1,const T2& bv2);
00157 
00158 //! returns the number of common bits (on and off) between two bit vects
00159 /*!
00160   \return <tt>bv1_n - (bv1^bv2)_o</tt>
00161 */
00162 template <typename T1, typename T2>
00163 const int
00164 NumBitsInCommon(const T1& bv1,const T2& bv2);
00165 
00166 //! returns the commong-bit similarity (on and off) between two bit vects
00167 /*!
00168   \return <tt>[bv1_n - (bv1^bv2)_o] / bv1_n</tt>
00169 */
00170 template <typename T1, typename T2>
00171 const double
00172 AllBitSimilarity(const T1& bv1,const T2& bv2);
00173 
00174 //! returns an IntVect with indices of all on bits in common between two bit vects
00175 template <typename T1, typename T2>
00176 IntVect
00177 OnBitsInCommon(const T1& bv1,const T2& bv2);
00178 
00179 //! returns an IntVect with indices of all off bits in common between two bit vects
00180 template <typename T1, typename T2>
00181 IntVect
00182 OffBitsInCommon(const T1& bv1,const T2& bv2);
00183 
00184 //! returns the on-bit projected similarities between two bit vects
00185 /*!
00186   \return two values, as a DoubleVect:
00187       - <tt>(bv1&bv2)_o / bv1_o</tt> 
00188       - <tt>(bv1&bv2)_o / bv2_o</tt> 
00189 */
00190 template <typename T1, typename T2>
00191 DoubleVect
00192 OnBitProjSimilarity(const T1& bv1,const T2& bv2);
00193 
00194 //! returns the on-bit projected similarities between two bit vects
00195 /*!
00196   \return two values, as a DoubleVect:
00197      - <tt>[bv1_n - (bv1|bv2)_o] / [bv1_n - bv1_o]</tt> 
00198      - <tt>[bv2_n - (bv1|bv2)_o] / [bv2_n - bv2_o]</tt> 
00199 
00200    <b>Note:</b> <tt>bv1_n = bv2_n</tt>
00201       
00202 */
00203 template <typename T1, typename T2>
00204 DoubleVect
00205 OffBitProjSimilarity(const T1& bv1,const T2& bv2);
00206 
00207 
00208 //! folds a bit vector \c factor times and returns the result
00209 /*!
00210   \param bv1    the vector to be folded
00211   \param factor (optional) the number of times to fold it
00212   
00213   \return a pointer to the folded fingerprint, which is
00214      <tt>bv1_n/factor</tt> long.
00215      
00216    <b>Note:</b> The caller is responsible for <tt>delete</tt>ing the result.
00217  */
00218 template <typename T1>
00219 T1 *
00220 FoldFingerprint(const T1& bv1,unsigned int factor=2);
00221 
00222 //! returns a text representation of a bit vector (a string of 0s and 1s)
00223 /*!
00224   \param bv1    the vector to be folded
00225   
00226   \return an std::string
00227 
00228  */
00229 template <typename T1>
00230 std::string
00231 BitVectToText(const T1& bv1);
00232 
00233 
00234 
00235 #endif

Generated on Sat May 24 08:36:32 2008 for RDCode by  doxygen 1.5.3