Package DataStructs :: Module SparseIntVect
[hide private]
[frames] | no frames]

Source Code for Module DataStructs.SparseIntVect

  1  # $Id: SparseIntVect.py 363 2007-10-04 05:37:14Z glandrum $ 
  2  # 
  3  #  Copyright (C) 2007 Greg Landrum 
  4  #   All Rights Reserved 
  5  # 
  6  import copy 
  7   
8 -class pySparseIntVect(object):
9 """ this class is pretty much obsolete (it's in C++ now) """ 10 size=0 11 container={}
12 - def __init__(self,size):
13 self.size=size 14 self.container={}
15
16 - def UpdateFromSequence(self,seq):
17 """ 18 >>> c1=pySparseIntVect(10) 19 >>> c1.UpdateFromSequence((0,1,1,5)) 20 >>> [x for x in c1] 21 [(0, 1), (1, 2), (5, 1)] 22 >>> c1.UpdateFromSequence((0,3)) 23 >>> [x for x in c1] 24 [(0, 2), (1, 2), (3, 1), (5, 1)] 25 26 """ 27 for v in seq: 28 self[v] += 1
29 - def InitFromSequence(self,seq):
30 """ 31 >>> c1=pySparseIntVect(10) 32 >>> c1.InitFromSequence((0,1,1,5)) 33 >>> [x for x in c1] 34 [(0, 1), (1, 2), (5, 1)] 35 36 """ 37 self.container={} 38 self.UpdateFromSequence(seq)
39
40 - def Sum(self,useAbs=False):
41 """ 42 >>> c1=pySparseIntVect(10) 43 >>> c1[0] = 3 44 >>> c1[2] = 2 45 >>> c1[4] = 5 46 >>> c1.Sum() 47 10 48 49 >>> c1[2] = -2 50 >>> c1.Sum() 51 6 52 >>> c1.Sum(useAbs=True) 53 10 54 """ 55 res=0 56 for v in self.container.values(): 57 if not useAbs: 58 res+=v 59 else: 60 res+=abs(v) 61 return res
62 63
64 - def __eq__(self,other):
65 """ 66 >>> c1=pySparseIntVect(10) 67 >>> c1[0] = 3 68 >>> c1[2] = 2 69 >>> c1[4] = 5 70 >>> c2=pySparseIntVect(10) 71 >>> c2[0] = 3 72 >>> c2[2] = 2 73 >>> c1 == c2 74 False 75 >>> c1 == c1 76 True 77 """ 78 if not isinstance(other,pySparseIntVect): 79 raise TypeError 80 if self.size != other.size: 81 return 0 82 return self.container==other.container
83 84
85 - def __iand__(self,other):
86 """ 87 >>> c1=pySparseIntVect(10) 88 >>> c1[0] = 3 89 >>> c1[2] = 2 90 >>> c1[4] = 5 91 >>> c2=pySparseIntVect(10) 92 >>> c2[0] = 2 93 >>> c2[2] = -2 94 >>> c2[5] = 6 95 >>> c1 &= c2 96 >>> [x for x in c1] 97 [(0, 2), (2, -2)] 98 99 """ 100 if not isinstance(other,pySparseIntVect): 101 raise TypeError 102 if self.size != other.size: 103 raise ValueError 104 105 newC = {} 106 for idx,v in self.container.iteritems(): 107 ov = other.container.get(idx,0) 108 if ov: 109 if v<ov: 110 newC[idx]=v 111 else: 112 newC[idx]=ov 113 self.container=newC 114 return self
115 - def __ior__(self,other):
116 """ 117 >>> c1=pySparseIntVect(10) 118 >>> c1[0] = 3 119 >>> c1[2] = 2 120 >>> c1[4] = 5 121 >>> c2=pySparseIntVect(10) 122 >>> c2[0] = 2 123 >>> c2[2] = -2 124 >>> c2[5] = 6 125 >>> c1 |= c2 126 >>> [x for x in c1] 127 [(0, 3), (2, 2), (4, 5), (5, 6)] 128 129 """ 130 if not isinstance(other,pySparseIntVect): 131 raise TypeError 132 if self.size != other.size: 133 raise ValueError 134 135 newC = {} 136 for idx,v in self.container.iteritems(): 137 ov = other.container.get(idx,0) 138 if v<ov: 139 newC[idx]=ov 140 else: 141 newC[idx]=v 142 for k,v in other.container.iteritems(): 143 if not newC.has_key(k): 144 newC[k]=v 145 self.container=newC 146 return self
147
148 - def __iadd__(self,other):
149 """ 150 >>> c1=pySparseIntVect(10) 151 >>> c1[0] = 3 152 >>> c1[2] = 2 153 >>> c1[4] = 5 154 >>> c2=pySparseIntVect(10) 155 >>> c2[0] = 2 156 >>> c2[2] = -2 157 >>> c2[5] = 6 158 >>> c1 += c2 159 >>> [x for x in c1] 160 [(0, 5), (4, 5), (5, 6)] 161 162 """ 163 if not isinstance(other,pySparseIntVect): 164 raise TypeError 165 if self.size != other.size: 166 raise ValueError 167 seen={} 168 for idx in self.container.keys(): 169 seen[idx]=1 170 v = self.container[idx]+other[idx] 171 if v: 172 self.container[idx]=v 173 else: 174 del self.container[idx] 175 for idx,v in other: 176 if not seen.has_key(idx): 177 self.container[idx]=v 178 return self
179
180 - def __isub__(self,other):
181 """ 182 >>> c1=pySparseIntVect(10) 183 >>> c1[0] = 3 184 >>> c1[2] = 2 185 >>> c1[4] = 5 186 >>> c2=pySparseIntVect(10) 187 >>> c2[0] = 2 188 >>> c2[2] = 2 189 >>> c2[5] = 6 190 >>> c1 -= c2 191 >>> [x for x in c1] 192 [(0, 1), (4, 5), (5, -6)] 193 194 """ 195 if not isinstance(other,pySparseIntVect): 196 raise TypeError 197 if self.size != other.size: 198 raise ValueError 199 seen={} 200 for idx in self.container.keys(): 201 seen[idx]=1 202 v = self.container[idx]-other[idx] 203 if v: 204 self.container[idx]=v 205 else: 206 del self.container[idx] 207 for idx,v in other: 208 if not seen.has_key(idx): 209 self.container[idx]=-v 210 return self
211
212 - def __imul__(self,other):
213 """ 214 >>> c1=pySparseIntVect(10) 215 >>> c1[0] = 3 216 >>> c1[4] = 5 217 >>> c2=pySparseIntVect(10) 218 >>> c2[0] = 2 219 >>> c2[5] = 6 220 >>> c1 *= c2 221 >>> [x for x in c1] 222 [(0, 6)] 223 224 """ 225 if not isinstance(other,pySparseIntVect): 226 raise TypeError 227 if self.size != other.size: 228 raise ValueError 229 for idx in self.container.keys(): 230 v = self.container[idx]*other[idx] 231 if v: 232 self.container[idx]=v 233 else: 234 del self.container[idx] 235 return self
236
237 - def __add__(self,other):
238 """ 239 >>> c1=pySparseIntVect(10) 240 >>> c1[0] = 3 241 >>> c1[4] = 5 242 >>> c2=pySparseIntVect(10) 243 >>> c2[0] = 2 244 >>> c2[5] = 6 245 >>> c3 = c2+c1 246 >>> [x for x in c3] 247 [(0, 5), (4, 5), (5, 6)] 248 249 """ 250 res = pySparseIntVect(self.size) 251 res.container = copy.deepcopy(self.container) 252 res += other 253 return res
254 - def __sub__(self,other):
255 """ 256 >>> c1=pySparseIntVect(10) 257 >>> c1[0] = 3 258 >>> c1[2] = 2 259 >>> c1[4] = 5 260 >>> c2=pySparseIntVect(10) 261 >>> c2[0] = 2 262 >>> c2[2] = 2 263 >>> c2[5] = 6 264 >>> c3 = c1-c2 265 >>> [x for x in c3] 266 [(0, 1), (4, 5), (5, -6)] 267 >>> [x for x in c1] 268 [(0, 3), (2, 2), (4, 5)] 269 270 """ 271 res = pySparseIntVect(self.size) 272 res.container = copy.deepcopy(self.container) 273 res -= other 274 return res
275 - def __mul__(self,other):
276 """ 277 >>> c1=pySparseIntVect(10) 278 >>> c1[0] = 3 279 >>> c1[4] = 5 280 >>> c2=pySparseIntVect(10) 281 >>> c2[0] = 2 282 >>> c2[5] = 6 283 >>> c3 = c1*c2 284 >>> [x for x in c3] 285 [(0, 6)] 286 >>> [x for x in c1] 287 [(0, 3), (4, 5)] 288 289 """ 290 res = pySparseIntVect(self.size) 291 res.container = copy.deepcopy(self.container) 292 res *= other 293 return res
294 - def __and__(self,other):
295 """ 296 >>> c1=pySparseIntVect(10) 297 >>> c1[0] = 3 298 >>> c1[2] = 2 299 >>> c1[4] = 5 300 >>> c2=pySparseIntVect(10) 301 >>> c2[0] = 2 302 >>> c2[2] = -2 303 >>> c2[5] = 6 304 >>> c3 = c1 & c2 305 >>> [x for x in c3] 306 [(0, 2), (2, -2)] 307 >>> [x for x in c1] 308 [(0, 3), (2, 2), (4, 5)] 309 310 """ 311 res = pySparseIntVect(self.size) 312 res.container = copy.deepcopy(self.container) 313 res &= other 314 return res
315 - def __or__(self,other):
316 """ 317 >>> c1=pySparseIntVect(10) 318 >>> c1[0] = 3 319 >>> c1[2] = 2 320 >>> c1[4] = 5 321 >>> c2=pySparseIntVect(10) 322 >>> c2[0] = 2 323 >>> c2[2] = -2 324 >>> c2[5] = 6 325 >>> c3 = c1 | c2 326 >>> [x for x in c3] 327 [(0, 3), (2, 2), (4, 5), (5, 6)] 328 >>> [x for x in c1] 329 [(0, 3), (2, 2), (4, 5)] 330 331 """ 332 res = pySparseIntVect(self.size) 333 res.container = copy.deepcopy(self.container) 334 res |= other 335 return res
336
337 - def __len__(self):
338 return self.size
339 - def __getitem__(self,which):
340 """ 341 >>> c1=pySparseIntVect(10) 342 >>> c1[0] = 3 343 >>> c1[4] = 5 344 >>> c1[0] 345 3 346 >>> c1[1] 347 0 348 349 """ 350 if abs(which)>=self.size: 351 raise IndexError,which 352 if which<0: 353 which = self.size-which 354 return self.container.get(which,0)
355 - def __setitem__(self,which,val):
356 if abs(which)>=self.size: 357 raise IndexError,which 358 val = int(val) 359 if which<0: 360 which = self.size-which 361 self.container[which]=val
362 - def __iter__(self):
363 """ 364 >>> c=pySparseIntVect(10) 365 >>> c[0] = 3 366 >>> c[4] = 5 367 >>> c[7] = -1 368 >>> for idx,v in c: 369 ... print idx,v 370 0 3 371 4 5 372 7 -1 373 374 """ 375 return self.container.iteritems()
376 377 378 import DataStructs 379 DiceSimilarity=DataStructs.DiceSimilarity
380 -def pyDiceSimilarity(v1,v2,bounds=None,useAbs=False):
381 """ Implements the DICE similarity metric. 382 383 >>> v1 = DataStructs.IntSparseIntVect(10) 384 >>> v2 = DataStructs.IntSparseIntVect(10) 385 >>> v1.UpdateFromSequence((1,2,3)) 386 >>> v2.UpdateFromSequence((1,2,3)) 387 >>> DiceSimilarity(v1,v2) 388 1.0 389 390 >>> v2 = DataStructs.IntSparseIntVect(10) 391 >>> v2.UpdateFromSequence((5,6)) 392 >>> DiceSimilarity(v1,v2) 393 0.0 394 395 >>> v1 = DataStructs.IntSparseIntVect(10) 396 >>> v2 = DataStructs.IntSparseIntVect(10) 397 >>> v1.UpdateFromSequence((1,2,3,4)) 398 >>> v2.UpdateFromSequence((1,3,5,7)) 399 >>> DiceSimilarity(v1,v2) 400 0.5 401 402 >>> v1 = DataStructs.IntSparseIntVect(10) 403 >>> v2 = DataStructs.IntSparseIntVect(10) 404 >>> v1.UpdateFromSequence((1,2,3,4,5,6)) 405 >>> v2.UpdateFromSequence((1,3)) 406 >>> DiceSimilarity(v1,v2) 407 0.5 408 409 """ 410 denom = 1.0*(v1.GetTotalVal(useAbs=useAbs)+v2.GetTotalVal(useAbs=useAbs)) 411 if not denom: 412 res = 0.0 413 else: 414 if bounds and (min(len(v1),len(v2))/denom) < bounds: 415 numer = 0.0 416 else: 417 numer=0.0 418 v3=v1&v2 419 numer=v3.GetTotalVal(useAbs=useAbs) 420 res = 2.*numer/denom 421 422 return res
423 424 #------------------------------------ 425 # 426 # doctest boilerplate 427 #
428 -def _test():
429 import doctest,sys 430 return doctest.testmod(sys.modules["__main__"])
431 432 if __name__ == '__main__': 433 import sys 434 failed,tried = _test() 435 sys.exit(failed) 436