Package ML :: Package FeatureSelect :: Module CMIM
[hide private]
[frames] | no frames]

Source Code for Module ML.FeatureSelect.CMIM

 1  # $Id: CMIM.py 2 2006-05-06 22:54:39Z glandrum $ 
 2  # 
 3  #  Copyright (C) 2005  Rational Discovery LLC 
 4  #    All Rights Reserved 
 5  # 
 6  """ for the moment this is using Francois Fleuret's cmim library 
 7   to do the feature selection 
 8   
 9   Reference: F. Fleuret "Fast Binary Feature Selection with Conditional 
10              Mutual Information", J. Machine Learn. Res. 5, 1531-1535 
11              (2004) 
12     
13  """ 
14  import RDConfig 
15  import DataStructs 
16  import tempfile 
17  import os 
18  import rdFeatSelect 
19   
20 -def SelectFeatures(examples,nFeatsToPick,bvCol=1):
21 res = rdFeatSelect.selectCMIM(examples,nFeatsToPick) 22 if -1 in res: 23 res = list(res) 24 res = tuple(res[:res.index(-1)]) 25 return res
26
27 -def _SelectFeatures(examples,nFeatsToPick,bvCol=1):
28 nPts = len(examples) 29 nFeats = examples[0][bvCol].GetNumBits() 30 31 exe = os.path.join(RDConfig.RDBaseDir,'External','cmim-1.0','cmim.exe') 32 if not os.path.exists(exe): 33 raise ValueError,'could not find cmim executable %s'%exe 34 35 inFname = tempfile.mktemp('.dat') 36 outFname = inFname + '.out' 37 inF = open(inFname,'w+') 38 print >>inF,nPts,nFeats 39 for row in examples: 40 print >>inF,row[bvCol].ToBitString() 41 print >>inF,row[-1] 42 inF.close() 43 inF = None 44 45 os.spawnlp(os.P_WAIT,exe,exe,'--nb-features',str(nFeatsToPick),'--train', 46 inFname,outFname) 47 48 inD = open(outFname,'r') 49 inL = inD.readline() 50 nCreated = int(inL) 51 inL = inD.readline() 52 res = [] 53 splitL = inL.split(' ') 54 for i in range(nFeatsToPick): 55 res.append(int(splitL[i])) 56 inD.close() 57 inD = None 58 59 os.unlink(inFname) 60 os.unlink(outFname) 61 62 return res
63