1
2
3
4
5
6
7 import exceptions
9 """ used to signal problems generating descriptor values """
10 pass
12 """ used to signal problems generating predictions """
13 pass
14
16 """ a container class to package a composite model with a descriptor
17 calculator so that objects needing predictions (compounds, molecules, etc.)
18 can be passed directly in without worrying about generating descriptors
19
20 """
21 - def __init__(self,descCalc=None,model=None,dataSet=None,notes=''):
22 self._descCalc = descCalc
23 self._model = model
24 self._notes = notes
25 self._dataSet = dataSet
26 self._initialized = 0
27 self._supplementalData = []
28
33
38
43
48
50 self._supplementalData = suppD
52 if not hasattr(self,'_supplementalData'):
53 self._supplementalData = []
54 return self._supplementalData
56 if not hasattr(self,'_supplementalData'):
57 self._supplementalData = []
58 self._supplementalData.append(data)
59
60 - def Classify(self,obj,label='',threshold=0):
61 if not self._initialized:
62 self.Init()
63 try:
64 descs = self._descCalc.CalcDescriptors(obj)
65 except:
66 raise DescriptorCalculationError,'problems encountered generating descriptors'
67
68 argVect = [label]+list(descs)+[0]
69 try:
70 res = self._model.ClassifyExample(argVect,threshold=threshold,appendExample=0)
71 except:
72 raise ClassificationError,'problems encountered generating prediction'
73
74 return res
75
77 if self._model is None or self._descCalc is None:
78 return
79
80 nms = self._model.GetDescriptorNames()
81 lbl = nms[0]
82 act = nms[-1]
83 descs = self._descCalc.GetDescriptorNames()
84 order = [lbl] + list(descs) + [act]
85 self._model.SetInputOrder(order)
86
87 self._initialized = 1
88
89 if __name__=='__main__':
90 from Chem import *
91 import cPickle
92 from ML.ModelPackage import Packager
93
94 calc = cPickle.load(open('test_data/Jan9_build3_calc.dsc','rb'))
95 model = cPickle.load(open('test_data/Jan9_build3_model.pkl','rb'))
96 pkg = Packager.ModelPackage(descCalc=calc,model=model)
97 pkg.SetNotes('General purpose model built from PhysProp data')
98 testD = [
99 ('Fc1ccc(NC(=O)c2cccnc2Oc3cccc(c3)C(F)(F)F)c(F)c1',0,1.0 ),
100 (r'CN/1(=C\C=C(/C=C1)\C\2=C\C=N(C)(Cl)\C=C2)Cl',0,0.70),
101 (r'NS(=O)(=O)c1cc(ccc1Cl)C2(O)NC(=O)c3ccccc32',1,0.70),
102 ]
103
104 for smi,pred,conf in testD:
105 m = MolFromSmiles(smi)
106 p,c = pkg.Classify(m)
107 if pred!=p or conf!=c:
108 raise ValueError,'Bad Prediction: %s'%(repr((smi,pred,conf,p,c)))
109 cPickle.dump(pkg,open('test_data/Jan9_build3_pkg.pkl','wb+'))
110 from Numeric import *
111 import RandomArray
112
113 names = calc.GetDescriptorNames()
114 perm = [names[x] for x in RandomArray.permutation(len(names))]
115 calc.simpleList = perm
116 calc.descriptorNames = perm
117 pkg.Init()
118 for smi,pred,conf in testD:
119 m = MolFromSmiles(smi)
120 p,c = pkg.Classify(m)
121 if pred!=p or conf!=c:
122 raise ValueError,'Bad Prediction: %s'%(repr((smi,pred,conf,p,c)))
123