Package rdkit :: Package Chem :: Package Fingerprints :: Module DbFpSupplier
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Fingerprints.DbFpSupplier

  1  # $Id: DbFpSupplier.py 997 2009-02-25 06:12:43Z glandrum $ 
  2  # 
  3  # Copyright (C) 2003-2006 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  """ Supplies a class for working with fingerprints from databases 
  8  #DOC  
  9   
 10  """ 
 11  from rdkit import RDConfig 
 12  from rdkit.VLib.Node import VLibNode 
 13  from rdkit import DataStructs 
 14  import cPickle 
 15  import sys 
16 -def warning(msg,dest=sys.stderr):
17 dest.write(msg)
18
19 -class DbFpSupplier(VLibNode):
20 """ 21 new fps come back with all additional fields from the 22 database set in a "_fieldsFromDb" data member 23 24 """
25 - def __init__(self,dbResults,fpColName='AutoFragmentFp',usePickles=True):
26 """ 27 28 DbResults should be a subclass of Dbase.DbResultSet.DbResultBase 29 30 """ 31 VLibNode.__init__(self) 32 self._usePickles = usePickles 33 self._data = dbResults 34 self._fpColName = fpColName.upper() 35 self._colNames = [x.upper() for x in self._data.GetColumnNames()] 36 if self._fpColName not in self._colNames: 37 raise ValueError,'fp column name "%s" not found in result set: %s'%(self._fpColName,str(self._colNames)) 38 self.fpCol = self._colNames.index(self._fpColName) 39 del self._colNames[self.fpCol] 40 self._colNames = tuple(self._colNames) 41 self._numProcessed=0
42 43
44 - def GetColumnNames(self):
45 return self._colNames
46
47 - def _BuildFp(self,data):
48 data = list(data) 49 pkl = str(data[self.fpCol]) 50 del data[self.fpCol] 51 self._numProcessed+=1; 52 try: 53 if self._usePickles: 54 newFp = cPickle.loads(pkl) 55 else: 56 newFp = DataStructs.ExplicitBitVect(pkl) 57 except: 58 import traceback 59 traceback.print_exc() 60 newFp = None 61 if newFp: 62 newFp._fieldsFromDb = data 63 return newFp
64
65 - def next(self):
66 itm = self.NextItem() 67 if itm is None: 68 raise StopIteration 69 return itm
70
71 -class ForwardDbFpSupplier(DbFpSupplier):
72 """ DbFp supplier supporting only forward iteration 73 74 >>> import os.path 75 >>> from rdkit.Dbase.DbConnection import DbConnect 76 >>> fName = RDConfig.RDTestDatabase 77 >>> conn = DbConnect(fName,'simple_combined') 78 >>> suppl = ForwardDbFpSupplier(conn.GetData()) 79 80 we can loop over the supplied fingerprints: 81 >>> fps = [] 82 >>> for fp in suppl: 83 ... fps.append(fp) 84 >>> len(fps) 85 12 86 87 """
88 - def __init__(self,*args,**kwargs):
89 DbFpSupplier.__init__(self,*args,**kwargs) 90 self.reset()
91
92 - def reset(self):
93 DbFpSupplier.reset(self) 94 self._dataIter = iter(self._data)
95
96 - def NextItem(self):
97 """ 98 99 NOTE: this has side effects 100 101 """ 102 try: 103 d = self._dataIter.next() 104 except StopIteration: 105 d = None 106 if d is not None: 107 newFp = self._BuildFp(d) 108 else: 109 newFp = None 110 return newFp
111
112 -class RandomAccessDbFpSupplier(DbFpSupplier):
113 """ DbFp supplier supporting random access: 114 >>> import os.path 115 >>> from rdkit.Dbase.DbConnection import DbConnect 116 >>> fName = RDConfig.RDTestDatabase 117 >>> conn = DbConnect(fName,'simple_combined') 118 >>> suppl = RandomAccessDbFpSupplier(conn.GetData()) 119 >>> len(suppl) 120 12 121 122 we can pull individual fingerprints: 123 >>> fp = suppl[5] 124 >>> fp.GetNumBits() 125 128 126 >>> fp.GetNumOnBits() 127 54 128 129 a standard loop over the fingerprints: 130 >>> fps = [] 131 >>> for fp in suppl: 132 ... fps.append(fp) 133 >>> len(fps) 134 12 135 136 or we can use an indexed loop: 137 >>> fps = [None]*len(suppl) 138 >>> for i in range(len(suppl)): 139 ... fps[i] = suppl[i] 140 >>> len(fps) 141 12 142 143 """
144 - def __init__(self,*args,**kwargs):
145 DbFpSupplier.__init__(self,*args,**kwargs) 146 self.reset()
147
148 - def __len__(self):
149 return len(self._data)
150
151 - def __getitem__(self,idx):
152 newD = self._data[idx] 153 return self._BuildFp(newD)
154
155 - def reset(self):
156 self._pos = -1
157
158 - def NextItem(self):
159 self._pos += 1 160 res = None 161 if self._pos < len(self): 162 res = self[self._pos] 163 return res
164 165 166 167 #------------------------------------ 168 # 169 # doctest boilerplate 170 #
171 -def _test():
172 import doctest,sys 173 return doctest.testmod(sys.modules["__main__"])
174 175 if __name__ == '__main__': 176 import sys 177 failed,tried = _test() 178 sys.exit(failed) 179