Package rdkit :: Package ML :: Module CompositeRun
[hide private]
[frames] | no frames]

Source Code for Module rdkit.ML.CompositeRun

  1  # $Id: CompositeRun.py 997 2009-02-25 06:12:43Z glandrum $ 
  2  # 
  3  #  Copyright (C) 2002-2006  greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  """ contains a class to store parameters for and results from 
  8  Composite building 
  9   
 10  """ 
 11  from rdkit import RDConfig 
 12  from rdkit.Dbase.DbConnection import DbConnect 
 13  from rdkit import DataStructs 
 14  from rdkit.Dbase import DbModule 
 15  ##from rdkit.ML.SVM import SVMClassificationModel as SVM 
 16   
17 -def SetDefaults(runDetails):
18 """ initializes a details object with default values 19 20 **Arguments** 21 22 - details: (optional) a _CompositeRun.CompositeRun_ object. 23 If this is not provided, the global _runDetails will be used. 24 25 **Returns** 26 27 the initialized _CompositeRun_ object. 28 29 30 """ 31 runDetails.nRuns = 1 32 runDetails.nModels = 10 33 runDetails.outName = '' 34 runDetails.badName = '' 35 runDetails.splitRun=0 36 runDetails.splitFrac=0.7 37 runDetails.lockRandom = 0 38 runDetails.randomActivities = 0 39 runDetails.shuffleActivities = 0 40 runDetails.replacementSelection = 0 41 42 # 43 # Tree Parameters 44 # 45 runDetails.useTrees = 1 46 runDetails.pruneIt=0 47 runDetails.lessGreedy=0 48 runDetails.limitDepth=-1 49 runDetails.recycleVars=0 50 runDetails.randomDescriptors=0 # toggles growing of random forests 51 52 # 53 # KNN Parameters 54 # 55 runDetails.useKNN = 0 56 runDetails.knnDistFunc = '' 57 runDetails.knnNeighs = 0 58 59 # 60 # SigTree Parameters 61 # 62 runDetails.useSigTrees =0 63 runDetails.useCMIM=0 64 runDetails.allowCollections=False 65 66 # 67 # Naive Bayes Classifier Parameters 68 # 69 runDetails.useNaiveBayes = 0 70 runDetails.mEstimateVal=-1.0 71 runDetails.useSigBayes =0 72 73 ## # 74 ## # SVM Parameters 75 ## # 76 ## runDetails.useSVM = 0 77 ## runDetails.svmKernel = SVM.radialKernel 78 ## runDetails.svmType = SVM.cSVCType 79 ## runDetails.svmGamma = None 80 ## runDetails.svmCost = None 81 ## runDetails.svmWeights = None 82 ## runDetails.svmDataType = 'float' 83 ## runDetails.svmDegree = 3 84 ## runDetails.svmCoeff = 0.0 85 ## runDetails.svmEps = 0.001 86 ## runDetails.svmNu = 0.5 87 ## runDetails.svmCache = 40 88 ## runDetails.svmShrink = 1 89 ## runDetails.svmDataType='float' 90 91 92 runDetails.bayesModel = 0 93 runDetails.dbName = '' 94 runDetails.dbUser = RDConfig.defaultDBUser 95 runDetails.dbPassword = RDConfig.defaultDBPassword 96 runDetails.dbWhat='*' 97 runDetails.dbWhere='' 98 runDetails.dbJoin='' 99 runDetails.qTableName = '' 100 runDetails.qBounds = [] 101 runDetails.qBoundCount = '' 102 runDetails.activityBounds = [] 103 runDetails.activityBoundsVals = '' 104 runDetails.detailedRes = 0 105 runDetails.noScreen = 0 106 runDetails.threshold = 0.0 107 runDetails.filterFrac = 0.0 108 runDetails.filterVal = 0.0 109 runDetails.modelFilterVal = 0.0 110 runDetails.modelFilterFrac = 0.0 111 runDetails.internalHoldoutFrac = 0.3 112 runDetails.pickleDataFileName='' 113 runDetails.startAt=None 114 runDetails.persistTblName='' 115 runDetails.randomSeed=(23,42) 116 runDetails.note='' 117 118 119 return runDetails
120 121
122 -class CompositeRun:
123 """ class to store parameters for and results from Composite building 124 125 This class has a default set of fields which are added to the database. 126 127 By default these fields are stored in a tuple, so they are immutable. This 128 is probably what you want. 129 130 131 """ 132 fields = (\ 133 ("rundate","varchar(32)"), 134 ("dbName","varchar(200)"), 135 ("dbWhat","varchar(200)"), 136 ("dbWhere","varchar(200)"), 137 ("dbJoin","varchar(200)"), 138 ("tableName","varchar(80)"), 139 ("note","varchar(120)"), 140 ("shuffled","smallint"), 141 ("randomized","smallint"), 142 ("overall_error","float"), 143 ("holdout_error","float"), 144 ("overall_fraction_dropped","float"), 145 ("holdout_fraction_dropped","float"), 146 ("overall_correct_conf","float"), 147 ("overall_incorrect_conf","float"), 148 ("holdout_correct_conf","float"), 149 ("holdout_incorrect_conf","float"), 150 ("overall_result_matrix","varchar(256)"), 151 ("holdout_result_matrix","varchar(256)"), 152 ("threshold","float"), 153 ("splitFrac","float"), 154 ("filterFrac","float"), 155 ("filterVal","float"), 156 ("modelFilterVal", "float"), 157 ("modelFilterFrac", "float"), 158 ("nModels","int"), 159 ("limitDepth","int"), 160 ("bayesModels","int"), 161 ("qBoundCount","varchar(3000)"), 162 ("activityBoundsVals","varchar(200)"), 163 ("cmd","varchar(500)"), 164 ("model",DbModule.binaryTypeName), 165 ) 166
167 - def _CreateTable(self,cn,tblName):
168 """ *Internal Use only* 169 170 """ 171 names = map(lambda x:x.strip().upper(),cn.GetTableNames()) 172 if tblName.upper() not in names: 173 curs = cn.GetCursor() 174 fmt = [] 175 for name,value in self.fields: 176 fmt.append('%s %s'%(name,value)) 177 fmtStr = ','.join(fmt) 178 curs.execute('create table %s (%s)'%(tblName,fmtStr)) 179 cn.Commit() 180 else: 181 heads = [x.upper() for x in cn.GetColumnNames()] 182 curs = cn.GetCursor() 183 for name,value in self.fields: 184 if name.upper() not in heads: 185 curs.execute('alter table %s add %s %s'%(tblName,name,value)) 186 cn.Commit()
187 - def Store(self,db='models.gdb',table='results', 188 user='sysdba',password='masterkey'):
189 """ adds the result to a database 190 191 **Arguments** 192 193 - db: name of the database to use 194 195 - table: name of the table to use 196 197 - user&password: connection information 198 199 """ 200 cn = DbConnect(db,table,user,password) 201 curs = cn.GetCursor() 202 self._CreateTable(cn,table) 203 204 cols = [] 205 vals = [] 206 for name,typ in self.fields: 207 try: 208 v = getattr(self,name) 209 except AttributeError: 210 pass 211 else: 212 cols.append('%s'%name) 213 vals.append(v) 214 215 nToDo = len(vals) 216 qs = ','.join([DbModule.placeHolder]*nToDo) 217 vals = tuple(vals) 218 219 cmd = 'insert into %s (%s) values (%s)'%(table, 220 ','.join(cols), 221 qs) 222 curs.execute(cmd,vals) 223 cn.Commit()
224
225 - def GetDataSet(self,**kwargs):
226 """ Returns a MLDataSet pulled from a database using our stored 227 values. 228 229 """ 230 from rdkit.ML.Data import DataUtils 231 data = DataUtils.DBToData(self.dbName,self.tableName, 232 user=self.dbUser,password=self.dbPassword, 233 what=self.dbWhat,where=self.dbWhere, 234 join=self.dbJoin,**kwargs) 235 236 return data
237 238
239 - def GetDataSetInfo(self,**kwargs):
240 """ Returns a MLDataSet pulled from a database using our stored 241 values. 242 243 """ 244 from rdkit.Dbase.DbConnection import DbConnect 245 conn = DbConnect(self.dbName,self.tableName) 246 res = conn.GetColumnNamesAndTypes(join=self.dbJoin,what=self.dbWhat,where=self.dbWhere) 247 return res
248