Package rdkit :: Package DataStructs :: Module VectCollection
[hide private]
[frames] | no frames]

Source Code for Module rdkit.DataStructs.VectCollection

  1  # $Id: VectCollection.py 997 2009-02-25 06:12:43Z glandrum $ 
  2  # 
  3  #  Copyright (C) 2005-2006 greg landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  from rdkit import DataStructs 
  8  import copy,struct,cPickle 
  9   
10 -class VectCollection(object):
11 """ 12 13 >>> vc = VectCollection() 14 >>> bv1 = DataStructs.ExplicitBitVect(10) 15 >>> bv1.SetBitsFromList((1,3,5)) 16 >>> vc.AddVect(1,bv1) 17 >>> bv1 = DataStructs.ExplicitBitVect(10) 18 >>> bv1.SetBitsFromList((6,8)) 19 >>> vc.AddVect(2,bv1) 20 >>> len(vc) 21 10 22 >>> vc.GetNumBits() 23 10 24 >>> vc[0] 25 0 26 >>> vc[1] 27 1 28 >>> vc[9] 29 0 30 >>> vc[6] 31 1 32 >>> vc.GetBit(6) 33 1 34 >>> list(vc.GetOnBits()) 35 [1, 3, 5, 6, 8] 36 37 keys must be unique, so adding a duplicate replaces the 38 previous values: 39 >>> bv1 = DataStructs.ExplicitBitVect(10) 40 >>> bv1.SetBitsFromList((7,9)) 41 >>> vc.AddVect(1,bv1) 42 >>> len(vc) 43 10 44 >>> vc[1] 45 0 46 >>> vc[9] 47 1 48 >>> vc[6] 49 1 50 51 we can also query the children: 52 >>> vc.NumChildren() 53 2 54 >>> cs = vc.GetChildren() 55 >>> id,fp = cs[0] 56 >>> id 57 1 58 >>> list(fp.GetOnBits()) 59 [7, 9] 60 >>> id,fp = cs[1] 61 >>> id 62 2 63 >>> list(fp.GetOnBits()) 64 [6, 8] 65 66 attach/detach operations: 67 >>> bv1 = DataStructs.ExplicitBitVect(10) 68 >>> bv1.SetBitsFromList((5,6)) 69 >>> vc.AddVect(3,bv1) 70 >>> vc.NumChildren() 71 3 72 >>> list(vc.GetOnBits()) 73 [5, 6, 7, 8, 9] 74 >>> vc.DetachVectsNotMatchingBit(6) 75 >>> vc.NumChildren() 76 2 77 >>> list(vc.GetOnBits()) 78 [5, 6, 8] 79 80 81 >>> bv1 = DataStructs.ExplicitBitVect(10) 82 >>> bv1.SetBitsFromList((7,9)) 83 >>> vc.AddVect(1,bv1) 84 >>> vc.NumChildren() 85 3 86 >>> list(vc.GetOnBits()) 87 [5, 6, 7, 8, 9] 88 >>> vc.DetachVectsMatchingBit(6) 89 >>> vc.NumChildren() 90 1 91 >>> list(vc.GetOnBits()) 92 [7, 9] 93 94 95 to copy VectCollections, use the copy module: 96 >>> bv1 = DataStructs.ExplicitBitVect(10) 97 >>> bv1.SetBitsFromList((5,6)) 98 >>> vc.AddVect(3,bv1) 99 >>> list(vc.GetOnBits()) 100 [5, 6, 7, 9] 101 >>> vc2 = copy.copy(vc) 102 >>> vc.DetachVectsNotMatchingBit(6) 103 >>> list(vc.GetOnBits()) 104 [5, 6] 105 >>> list(vc2.GetOnBits()) 106 [5, 6, 7, 9] 107 108 The Uniquify() method can be used to remove duplicate vectors: 109 >>> vc = VectCollection() 110 >>> bv1 = DataStructs.ExplicitBitVect(10) 111 >>> bv1.SetBitsFromList((7,9)) 112 >>> vc.AddVect(1,bv1) 113 >>> vc.AddVect(2,bv1) 114 >>> bv1 = DataStructs.ExplicitBitVect(10) 115 >>> bv1.SetBitsFromList((2,3,5)) 116 >>> vc.AddVect(3,bv1) 117 >>> vc.NumChildren() 118 3 119 >>> vc.Uniquify() 120 >>> vc.NumChildren() 121 2 122 123 124 125 """
126 - def __init__(self):
127 self.__vects = {} 128 self.__orVect = None 129 self.__numBits = -1 130 self.__needReset=True
131 132
133 - def GetOrVect(self):
134 if self.__needReset: 135 self.Reset() 136 return self.__orVect
137 orVect = property(GetOrVect) 138
139 - def AddVect(self,id,vect):
140 self.__vects[id]=vect 141 self.__needReset=True
142
143 - def Reset(self):
144 if not self.__needReset: 145 return 146 self.__orVect=None 147 if not self.__vects: 148 return 149 ks = self.__vects.keys() 150 self.__orVect = copy.copy(self.__vects[ks[0]]) 151 self.__numBits = self.__orVect.GetNumBits() 152 for i in range(1,len(ks)): 153 self.__orVect |= self.__vects[ks[i]] 154 self.__needReset=False
155
156 - def NumChildren(self):
157 return len(self.__vects.keys())
158
159 - def GetChildren(self):
160 return tuple(self.__vects.iteritems())
161
162 - def GetBit(self,id):
163 if self.__needReset: 164 self.Reset() 165 return self[id]
166 - def GetNumBits(self):
167 return len(self)
168
169 - def GetOnBits(self):
170 if self.__needReset: 171 self.Reset() 172 return self.__orVect.GetOnBits()
173
174 - def DetachVectsNotMatchingBit(self,bit):
175 items = list(self.__vects.iteritems()) 176 for k,v in items: 177 if not v.GetBit(bit): 178 del(self.__vects[k]) 179 self.__needReset=True
180
181 - def DetachVectsMatchingBit(self,bit):
182 items = list(self.__vects.iteritems()) 183 for k,v in items: 184 if v.GetBit(bit): 185 del(self.__vects[k]) 186 self.__needReset=True
187
188 - def Uniquify(self,verbose=False):
189 obls = {} 190 for k,v in self.__vects.iteritems(): 191 obls[k] = list(v.GetOnBits()) 192 193 keys = self.__vects.keys() 194 nKeys = len(keys) 195 keep = self.__vects.keys() 196 for i in range(nKeys): 197 k1 = keys[i] 198 if k1 in keep: 199 obl1 = obls[k1] 200 idx = keys.index(k1) 201 for j in range(idx+1,nKeys): 202 k2 = keys[j] 203 if k2 in keep: 204 obl2 = obls[k2] 205 if obl1==obl2: 206 keep.remove(k2) 207 208 self.__needsReset=True 209 tmp = {} 210 for k in keep: 211 tmp[k] = self.__vects[k] 212 if verbose: print 'uniquify:',len(self.__vects),'->',len(tmp) 213 self.__vects=tmp
214 215
216 - def __len__(self):
217 if self.__needReset: 218 self.Reset() 219 return self.__numBits
220 - def __getitem__(self,id):
221 if self.__needReset: 222 self.Reset() 223 return self.__orVect.GetBit(id)
224 225 # 226 # set up our support for pickling: 227 #
228 - def __getstate__(self):
229 pkl = struct.pack('I',len(self.__vects)) 230 for k,v in self.__vects.iteritems(): 231 pkl += struct.pack('I',k) 232 p = v.ToBinary() 233 l = len(p) 234 pkl += struct.pack('I%ds'%(l),l,p) 235 return pkl
236
237 - def __setstate__(self,pkl):
238 self.__vects = {} 239 self.__orVect = None 240 self.__numBits = -1 241 self.__needReset=True 242 szI = struct.calcsize('I') 243 offset = 0 244 nToRead = struct.unpack('I',pkl[offset:offset+szI])[0] 245 offset += szI 246 for i in range(nToRead): 247 k = struct.unpack('I',pkl[offset:offset+szI])[0] 248 offset += szI 249 l = struct.unpack('I',pkl[offset:offset+szI])[0] 250 offset += szI 251 sz = struct.calcsize('%ds'%l) 252 bv = DataStructs.ExplicitBitVect(struct.unpack('%ds'%l,pkl[offset:offset+sz])[0]) 253 offset += sz 254 self.AddVect(k,bv)
255 256 257 258 #------------------------------------ 259 # 260 # doctest boilerplate 261 #
262 -def _test():
263 import doctest,sys 264 return doctest.testmod(sys.modules["__main__"])
265 266 267 if __name__ == '__main__': 268 import sys 269 failed,tried = _test() 270 sys.exit(failed) 271