InfoGainFuncs.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006 #ifndef INFOGAINFUNC_H
00007 #define INFOGAINFUNC_H
00008
00009 #include <RDGeneral/types.h>
00010
00011 namespace RDInfoTheory {
00012
00013 template<class T> double ChiSquare(T *dMat, long int dim1,long int dim2) {
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 T *rowSums, *colSums;
00026 int i, j, tSum;
00027
00028 tSum = 0;
00029 rowSums = new T[dim1];
00030 for (i = 0; i < dim1; i++) {
00031 int idx1 = i*dim2;
00032 rowSums[i] = (T)0.0;
00033 for (j = 0; j < dim2; j++) {
00034 rowSums[i] += dMat[idx1 + j];
00035 }
00036 tSum += (int)rowSums[i];
00037 }
00038
00039
00040 colSums = new T[dim2];
00041 for (i = 0; i < dim2; i++) {
00042 colSums[i] = (T)0.0;
00043 for (j = 0; j < dim1; j++) {
00044 colSums[i] += dMat[j*dim2 + i];
00045 }
00046 }
00047
00048 double chi = 0.0;
00049 for ( i = 0; i < dim1; i++) {
00050 double rchi = 0.0;
00051 for (j = 0; j < dim2; j++) {
00052 rchi += (pow((double)dMat[i*dim2 + j], 2)/colSums[j]);
00053 }
00054 chi += ( ((double)tSum/rowSums[i])*rchi );
00055 }
00056 chi -= tSum;
00057 delete [] rowSums;
00058 delete [] colSums;
00059
00060 return chi;
00061 }
00062
00063 template<class T> double InfoEntropy(T *tPtr, long int dim) {
00064 int i;
00065 T nInstances = 0;
00066 double accum=0.0,d;
00067
00068 for(i=0;i<dim;i++){
00069 nInstances += tPtr[i];
00070 }
00071
00072 if(nInstances != 0){
00073 for(i=0;i<dim;i++){
00074 d = (double)tPtr[i]/nInstances;
00075 if(d != 0){
00076 accum += -d*log(d);
00077 }
00078 }
00079 }
00080 return accum/log(2.0);
00081 }
00082
00083 template<class T> double InfoEntropyGain(T *dMat, long int dim1,long int dim2) {
00084 T *variableRes, *overallRes;
00085 double gain,term2;
00086 int tSum;
00087
00088
00089 variableRes = new T[dim1];
00090 for(long int i=0;i<dim1;i++){
00091 long int idx1 = i*dim2;
00092 variableRes[i] = (T)0.0;
00093 for(long int j=0;j<dim2;j++){
00094 variableRes[i] += dMat[idx1+j];
00095
00096 }
00097 }
00098
00099 overallRes = new T[dim2];
00100
00101 for(long int i=0;i<dim2;i++){
00102 overallRes[i] = (T)0.0;
00103 for(long int j=0;j<dim1;j++){
00104 overallRes[i] += dMat[j*dim2+i];
00105
00106 }
00107 }
00108
00109 term2 = 0.0;
00110 for(long int i=0;i<dim1;i++) {
00111 T *tPtr;
00112 tPtr = dMat + i*dim2;
00113 term2 += variableRes[i] * InfoEntropy(tPtr,dim2);
00114 }
00115 tSum = 0;
00116 for(long int i=0;i<dim2;i++){
00117 tSum += static_cast<int>(overallRes[i]);
00118 }
00119
00120 if(tSum != 0){
00121 term2 /= tSum;
00122 gain = InfoEntropy(overallRes,dim2) - term2;
00123 }
00124 else{
00125 gain = 0.0;
00126 }
00127
00128
00129 delete [] overallRes;
00130 delete [] variableRes;
00131 return gain;
00132 }
00133
00134
00135 }
00136 #endif
00137
00138