Package rdkit :: Package ML :: Package ModelPackage :: Module PackageUtils
[hide private]
[frames] | no frames]

Source Code for Module rdkit.ML.ModelPackage.PackageUtils

  1  # 
  2  # Copyright (C) 2003 Rational Discovery LLC 
  3  # All rights are reserved. 
  4  # 
  5  from elementtree.ElementTree import ElementTree,Element,SubElement 
  6  import time 
  7   
  8   
9 -def _ConvertModelPerformance(perf,modelPerf):
10 if len(modelPerf)>3: 11 confMat = modelPerf[3] 12 accum = 0 13 for row in confMat: 14 for entry in row: 15 accum += entry 16 accum = str(accum) 17 else: 18 confMat = None 19 accum = 'N/A' 20 21 if len(modelPerf)>4: 22 elem = SubElement(perf,"ScreenThreshold") 23 elem.text=str(modelPerf[4]) 24 elem = SubElement(perf,"NumScreened") 25 elem.text=accum 26 if len(modelPerf)>4: 27 elem = SubElement(perf,"NumSkipped") 28 elem.text=str(modelPerf[6]) 29 elem = SubElement(perf,"Accuracy") 30 elem.text=str(modelPerf[0]) 31 elem = SubElement(perf,"AvgCorrectConf") 32 elem.text=str(modelPerf[1]) 33 elem = SubElement(perf,"AvgIncorrectConf") 34 elem.text=str(modelPerf[2]) 35 if len(modelPerf)>4: 36 elem = SubElement(perf,"AvgSkipConf") 37 elem.text=str(modelPerf[5]) 38 if confMat: 39 elem = SubElement(perf,"ConfusionMatrix") 40 elem.text = str(confMat)
41
42 -def PackageToXml(pkg,summary="N/A",trainingDataId='N/A', 43 dataPerformance=[], 44 recommendedThreshold=None, 45 classDescriptions=[], 46 modelType=None, 47 modelOrganism=None):
48 """ generates XML for a package that follows the RD_Model.dtd 49 50 If provided, dataPerformance should be a sequence of 2-tuples: 51 ( note, performance ) 52 where performance is of the form: 53 ( accuracy, avgCorrectConf, avgIncorrectConf, confusionMatrix, thresh, avgSkipConf, nSkipped ) 54 the last four elements are optional 55 56 """ 57 head = Element("RDModelInfo") 58 name = SubElement(head,"ModelName") 59 notes = pkg.GetNotes() 60 if not notes: 61 notes = "Unnamed model" 62 name.text = notes 63 summ = SubElement(head,"ModelSummary") 64 summ.text = summary 65 calc = pkg.GetCalculator() 66 descrs = SubElement(head,"ModelDescriptors") 67 for name,summary,func in zip(calc.GetDescriptorNames(),calc.GetDescriptorSummaries(),calc.GetDescriptorFuncs()): 68 descr = SubElement(descrs,"Descriptor") 69 elem = SubElement(descr,"DescriptorName") 70 elem.text = name 71 elem = SubElement(descr,"DescriptorDetail") 72 elem.text = summary 73 if hasattr(func,'version'): 74 vers = SubElement(descr,"DescriptorVersion") 75 major,minor,patch = func.version.split('.') 76 elem = SubElement(vers,"VersionMajor") 77 elem.text = major 78 elem = SubElement(vers,"VersionMinor") 79 elem.text = minor 80 elem = SubElement(vers,"VersionPatch") 81 elem.text = patch 82 83 elem = SubElement(head,"TrainingDataId") 84 elem.text = trainingDataId 85 86 for description,perfData in dataPerformance: 87 dataNode = SubElement(head,"ValidationData") 88 note = SubElement(dataNode,'ScreenNote') 89 note.text = description 90 perf = SubElement(dataNode,"PerformanceData") 91 _ConvertModelPerformance(perf,perfData) 92 93 94 if recommendedThreshold: 95 elem = SubElement(head,"RecommendedThreshold") 96 elem.text=str(recommendedThreshold) 97 98 if classDescriptions: 99 elem = SubElement(head,"ClassDescriptions") 100 for val,text in classDescriptions: 101 descr = SubElement(elem,'ClassDescription') 102 valElem = SubElement(descr,'ClassVal') 103 valElem.text = str(val) 104 valText = SubElement(descr,'ClassText') 105 valText.text = str(text) 106 107 if modelType: 108 elem = SubElement(head,"ModelType") 109 elem.text=modelType 110 if modelOrganism: 111 elem = SubElement(head,"ModelOrganism") 112 elem.text=modelOrganism 113 114 115 hist = SubElement(head,"ModelHistory") 116 revision = SubElement(hist,"Revision") 117 tm = time.localtime() 118 date = SubElement(revision,"RevisionDate") 119 elem = SubElement(date,"Year") 120 elem.text=str(tm[0]) 121 elem = SubElement(date,"Month") 122 elem.text=str(tm[1]) 123 elem = SubElement(date,"Day") 124 elem.text=str(tm[2]) 125 note = SubElement(revision,"RevisionNote") 126 note.text = "Created" 127 128 return ElementTree(head)
129 130 131 if __name__=='__main__': 132 import sys,cPickle 133 from cStringIO import StringIO 134 pkg = cPickle.load(open(sys.argv[1],'rb')) 135 perf = (.80,.95,.70,[[4,1],[1,4]]) 136 tree = PackageToXml(pkg,dataPerformance=[('training data performance',perf)]) 137 io = StringIO() 138 tree.write(io) 139 txt = io.getvalue() 140 header = """<?xml version="1.0"?> 141 <!DOCTYPE RDModelInfo PUBLIC "-//RD//DTD RDModelInfo //EN" "RD_Model.dtd"> 142 """ 143 print header 144 print txt.replace('><','>\n<') 145