docs/cppapi/InfoGainFuncs_8h_source.html

// $Id$

//

//  Copyright (C) 2003 Rational Discovery LLC

//


#include <RDGeneral/export.h>

#ifndef INFOGAINFUNC_H

#define INFOGAINFUNC_H


#include <RDGeneral/types.h>


namespace RDInfoTheory {


template <class T>


double ChiSquare(T *dMat, long int dim1, long int dim2) {

  // For a contingency matrix with each column corresponding to a class and each

  // row to a

  // the descriptor (or variable) state, the matrix looks something like for 3x3

  // problem

  //

  //            1    2    3   Totals

  //      1 |  N11  N12  N13    R1

  //      2 |  N21  N22  N23    R2

  //      3 |  N31  N32  N33    R3

  // Totals |   C1   C2   C3    N

  //

  //  Th chi squere formula is

  //  chi = sum((N/Ri)*sum(Nij^2/Cj) ) -N

  T *rowSums, *colSums;

  int i, j, tSum;

  // find the row sum

  tSum = 0;

  rowSums = new T[dim1];

  for (i = 0; i < dim1; i++) {

    int idx1 = i * dim2;

    rowSums[i] = (T)0.0;

    for (j = 0; j < dim2; j++) {

      rowSums[i] += dMat[idx1 + j];

    }

    tSum += (int)rowSums[i];

  }


  // find the column sums

  colSums = new T[dim2];

  for (i = 0; i < dim2; i++) {

    colSums[i] = (T)0.0;

    for (j = 0; j < dim1; j++) {

      colSums[i] += dMat[j * dim2 + i];

    }

  }


  double chi = 0.0;

  for (i = 0; i < dim1; i++) {

    double rchi = 0.0;

    for (j = 0; j < dim2; j++) {

      rchi += (pow((double)dMat[i * dim2 + j], 2) / colSums[j]);

    }

    chi += (((double)tSum / rowSums[i]) * rchi);

  }

  chi -= tSum;

  delete[] rowSums;

  delete[] colSums;


  return chi;

}


template <class T>


double InfoEntropy(T *tPtr, long int dim) {

  int i;

  T nInstances = 0;

  double accum = 0.0, d;


  for (i = 0; i < dim; i++) {

    nInstances += tPtr[i];

  }


  if (nInstances != 0) {

    for (i = 0; i < dim; i++) {

      d = (double)tPtr[i] / nInstances;

      if (d != 0) {

        accum += -d * log(d);

      }

    }

  }

  return accum / log(2.0);

}


template <class T>


double InfoEntropyGain(T *dMat, long int dim1, long int dim2) {

  T *variableRes, *overallRes;

  double gain, term2;

  int tSum;


  // std::cerr<<" --------\n    ieg: "<<dim1<<" "<<dim2<<std::endl;

  variableRes = new T[dim1];

  for (long int i = 0; i < dim1; i++) {

    long int idx1 = i * dim2;

    variableRes[i] = (T)0.0;

    for (long int j = 0; j < dim2; j++) {

      variableRes[i] += dMat[idx1 + j];

      // std::cerr<<"  "<<i<<" "<<j<<" "<<dMat[idx1+j]<<std::endl;

    }

  }


  overallRes = new T[dim2];

  // do the col sums

  for (long int i = 0; i < dim2; i++) {

    overallRes[i] = (T)0.0;

    for (long int j = 0; j < dim1; j++) {

      overallRes[i] += dMat[j * dim2 + i];

      // std::cerr<<"  "<<i<<" "<<j<<" "<<dMat[j*dim2+i]<<std::endl;

    }

  }


  term2 = 0.0;

  for (long int i = 0; i < dim1; i++) {

    T *tPtr;

    tPtr = dMat + i * dim2;

    term2 += variableRes[i] * InfoEntropy(tPtr, dim2);

  }

  tSum = 0;

  for (long int i = 0; i < dim2; i++) {

    tSum += static_cast<int>(overallRes[i]);

  }


  if (tSum != 0) {

    term2 /= tSum;

    gain = InfoEntropy(overallRes, dim2) - term2;

  } else {

    gain = 0.0;

  }

  // std::cerr<<"  >gain> "<<gain<<std::endl;


  delete[] overallRes;

  delete[] variableRes;

  return gain;

}


}  // namespace RDInfoTheory

#endif

export.h

RDInfoTheory
Class used to rank bits based on a specified measure of information.
Definition CorrMatGenerator.h:18

RDInfoTheory::InfoEntropyGain
double InfoEntropyGain(T *dMat, long int dim1, long int dim2)
Definition InfoGainFuncs.h:89

RDInfoTheory::ChiSquare
double ChiSquare(T *dMat, long int dim1, long int dim2)
Definition InfoGainFuncs.h:15

RDInfoTheory::InfoEntropy
double InfoEntropy(T *tPtr, long int dim)
Definition InfoGainFuncs.h:68

types.h