Package rdkit :: Package Chem :: Package Suppliers :: Module DbMolSupplier
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Suppliers.DbMolSupplier

  1  # $Id: DbMolSupplier.py 997 2009-02-25 06:12:43Z glandrum $ 
  2  # 
  3  # Copyright (C) 2003-2006 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  """ Supplies a class for working with molecules from databases 
  8  #DOC  
  9   
 10  """ 
 11  from rdkit import Chem 
 12  from rdkit.Chem.Suppliers.MolSupplier import MolSupplier 
 13  import sys 
14 -def warning(msg,dest=sys.stderr):
15 dest.write(msg)
16
17 -class DbMolSupplier(MolSupplier):
18 """ 19 new molecules come back with all additional fields from the 20 database set in a "_fieldsFromDb" data member 21 22 """
23 - def __init__(self,dbResults, 24 molColumnFormats={'SMILES':'SMI', 25 'SMI':'SMI', 26 'MOLPKL':'PKL'}, 27 nameCol='', 28 transformFunc=None, 29 **kwargs):
30 """ 31 32 DbResults should be a subclass of Dbase.DbResultSet.DbResultBase 33 34 """ 35 self._data = dbResults 36 self._colNames = [x.upper() for x in self._data.GetColumnNames()] 37 nameCol = nameCol.upper() 38 self.molCol = -1 39 self.transformFunc=transformFunc 40 try: 41 self.nameCol = self._colNames.index(nameCol) 42 except ValueError: 43 self.nameCol = -1 44 for name in molColumnFormats.keys(): 45 name = name.upper() 46 try: 47 idx = self._colNames.index(name) 48 except ValueError: 49 pass 50 else: 51 self.molCol = idx 52 self.molFmt = molColumnFormats[name] 53 break 54 if self.molCol < 0: 55 raise ValueError,'DbResultSet has no recognizable molecule column' 56 del self._colNames[self.molCol] 57 self._colNames = tuple(self._colNames) 58 self._numProcessed=0
59 - def GetColumnNames(self):
60 return self._colNames
61
62 - def _BuildMol(self,data):
63 data = list(data) 64 molD = data[self.molCol] 65 del data[self.molCol] 66 self._numProcessed+=1; 67 try: 68 if self.molFmt =='SMI': 69 newM = Chem.MolFromSmiles(str(molD)) 70 if not newM: 71 warning('Problems processing mol %d, smiles: %s\n'%(self._numProcessed,molD)) 72 elif self.molFmt =='PKL': 73 newM = Chem.Mol(str(molD)) 74 except: 75 import traceback 76 traceback.print_exc() 77 newM = None 78 else: 79 if newM and self.transformFunc: 80 try: 81 newM = self.transformFunc(newM,data) 82 except: 83 import traceback 84 traceback.print_exc() 85 newM = None 86 if newM: 87 newM._fieldsFromDb = data 88 nFields = len(data) 89 for i in range(nFields): 90 newM.SetProp(self._colNames[i],str(data[i])) 91 if self.nameCol >=0 : 92 newM.SetProp('_Name',str(data[self.nameCol])) 93 return newM
94
95 -class ForwardDbMolSupplier(DbMolSupplier):
96 """ DbMol supplier supporting only forward iteration 97 98 99 new molecules come back with all additional fields from the 100 database set in a "_fieldsFromDb" data member 101 102 """
103 - def __init__(self,dbResults,**kwargs):
104 """ 105 106 DbResults should be an iterator for Dbase.DbResultSet.DbResultBase 107 108 """ 109 DbMolSupplier.__init__(self,dbResults,**kwargs) 110 self.Reset()
111
112 - def Reset(self):
113 self._dataIter = iter(self._data)
114
115 - def NextMol(self):
116 """ 117 118 NOTE: this has side effects 119 120 """ 121 try: 122 d = self._dataIter.next() 123 except StopIteration: 124 d = None 125 if d is not None: 126 newM = self._BuildMol(d) 127 else: 128 newM = None 129 130 return newM
131
132 -class RandomAccessDbMolSupplier(DbMolSupplier):
133 - def __init__(self,dbResults,**kwargs):
134 """ 135 136 DbResults should be a Dbase.DbResultSet.RandomAccessDbResultSet 137 138 """ 139 DbMolSupplier.__init__(self,dbResults,**kwargs) 140 self._pos = -1
141
142 - def __len__(self):
143 return len(self._data)
144
145 - def __getitem__(self,idx):
146 newD = self._data[idx] 147 return self._BuildMol(newD)
148
149 - def Reset(self):
150 self._pos = -1
151 - def NextMol(self):
152 self._pos += 1 153 res = None 154 if self._pos < len(self): 155 res = self[self._pos] 156 return res
157