Package rdkit :: Package Chem :: Module FastSDMolSupplier
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.FastSDMolSupplier

 1  # $Id: FastSDMolSupplier.py 997 2009-02-25 06:12:43Z glandrum $ 
 2  # 
 3  #  Copyright (C) 2007 Greg Landrum 
 4  #   @@ All Rights Reserved @@ 
 5  # 
 6  import sys 
 7  from rdkit import Chem 
 8   
9 -class FastSDMolSupplier(object):
10 """ A wrapper around an SDMolSupplier that precomputes and stores 11 molecular indices (via text processing) to allow quick length 12 calculations and random access. 13 14 NOTE that this class needs to have the entire SD data in memory, 15 so it's probably not particularly useful with large files. 16 """ 17 suppl=None 18 data=None 19 sanitize=True
20 - def __init__(self,fileN=None,data=None,sanitize=True,removeHs=True):
21 if fileN: 22 data = open(fileN,'r').read() 23 self.sanitize=sanitize 24 self.removeHs=removeHs 25 if data: 26 data = data.replace('\r\n','\n') 27 self.init(data)
28
29 - def init(self,data,recogTxt='$$$$\n'):
30 if not data: 31 raise ValueError,'no data' 32 # FIX: it'd be nice to not be caching data locally like this, but it's the easiest 33 # way to handle pickle support. 34 self.data=data 35 self.suppl = Chem.SDMolSupplier() 36 self.suppl.SetData(data,sanitize=self.sanitize,removeHs=self.removeHs) 37 38 self._pos = [0] 39 p = 0 40 while 1: 41 try: 42 p = data.index(recogTxt,p+1) 43 p+=len(recogTxt) 44 except: 45 break 46 else: 47 self._pos.append(p) 48 self._pos.pop(-1) 49 self.suppl._SetStreamIndices(self._pos) 50 self._idx=0
51
52 - def GetItemText(self,idx):
53 startOfItem = self._pos[idx] 54 if idx+1<len(self._pos): 55 endOfItem = self._pos[idx+1] 56 else: 57 endOfItem = -1 58 return self.data[startOfItem:endOfItem]
59
60 - def reset(self):
61 self.suppl.reset() 62 self._idx=0
63 64 # ---------------------------------------------------------------- 65 # support random access and an iterator interface:
66 - def __iter__(self):
67 self.suppl.reset() 68 return self
69 - def next(self):
70 self._idx+=1 71 return self.suppl.next()
72
73 - def __len__(self):
74 return len(self.suppl)
75 - def __getitem__(self,idx):
76 return self.suppl[idx]
77