Package rdkit :: Package Chem :: Package Pharm2D :: Module SigFactory
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Pharm2D.SigFactory

  1  # $Id: SigFactory.py 1023 2009-03-20 05:15:16Z glandrum $ 
  2  # 
  3  # Copyright (C) 2003-2008 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  """ contains factory class for producing signatures 
  8   
  9   
 10  """ 
 11  from rdkit.DataStructs import SparseBitVect,IntSparseIntVect,LongSparseIntVect 
 12  from rdkit.Chem.Pharm2D import Utils 
 13  import copy 
 14  import numpy 
 15   
 16  _verbose = False 
 17   
 18   
19 -class SigFactory(object):
20 """ 21 22 SigFactory's are used by creating one, setting the relevant 23 parameters, then calling the GetSignature() method each time a 24 signature is required. 25 26 """
27 - def __init__(self,featFactory,useCounts=False,minPointCount=2,maxPointCount=3, 28 shortestPathsOnly=True,includeBondOrder=False,skipFeats=None, 29 trianglePruneBins=True):
30 self.featFactory = featFactory 31 self.useCounts=useCounts 32 self.minPointCount=minPointCount 33 self.maxPointCount=maxPointCount 34 self.shortestPathsOnly=shortestPathsOnly 35 self.includeBondOrder=includeBondOrder 36 self.trianglePruneBins=trianglePruneBins 37 if skipFeats is None: 38 self.skipFeats=[] 39 else: 40 self.skipFeats=skipFeats 41 self._bins = None 42 self.sigKlass=None
43 44
45 - def SetBins(self,bins):
46 """ bins should be a list of 2-tuples """ 47 self._bins = copy.copy(bins) 48 self.Init()
49
50 - def GetBins(self):
51 return self._bins
52 - def GetNumBins(self):
53 return len(self._bins)
54
55 - def GetSignature(self):
56 return self.sigKlass(self._sigSize)
57
58 - def _GetBitSummaryData(self,bitIdx):
59 nPts,combo,scaffold = self.GetBitInfo(bitIdx) 60 fams=self.GetFeatFamilies() 61 labels = [fams[x] for x in combo] 62 dMat = numpy.zeros((nPts,nPts),numpy.int) 63 dVect = Utils.nPointDistDict[nPts] 64 for idx in range(len(dVect)): 65 i,j = dVect[idx] 66 dMat[i,j] = scaffold[idx] 67 dMat[j,i] = scaffold[idx] 68 69 return nPts,combo,scaffold,labels,dMat
70
71 - def GetBitDescriptionAsText(self,bitIdx,includeBins=0,fullPage=1):
72 """ returns text with a description of the bit 73 74 **Arguments** 75 76 - bitIdx: an integer bit index 77 78 - includeBins: (optional) if nonzero, information about the bins will be 79 included as well 80 81 - fullPage: (optional) if nonzero, html headers and footers will 82 be included (so as to make the output a complete page) 83 84 **Returns** 85 86 a string with the HTML 87 88 """ 89 nPts,combo,scaffold,labels,dMat=self._GetBitSummaryData(bitIdx)
90
91 - def GetBitDescription(self,bitIdx):
92 """ returns a text description of the bit 93 94 **Arguments** 95 96 - bitIdx: an integer bit index 97 98 **Returns** 99 100 a string 101 102 """ 103 nPts,combo,scaffold,labels,dMat=self._GetBitSummaryData(bitIdx) 104 res = " ".join(labels)+ " " 105 for row in dMat: 106 res += "|"+" ".join([str(x) for x in row]) 107 res += "|" 108 return res
109
110 - def _findBinIdx(self,dists,bins,scaffolds):
111 """ OBSOLETE: this has been rewritten in C++ 112 Internal use only 113 Returns the index of a bin defined by a set of distances. 114 115 **Arguments** 116 117 - dists: a sequence of distances (not binned) 118 119 - bins: a sorted sequence of distance bins (2-tuples) 120 121 - scaffolds: a list of possible scaffolds (bin combinations) 122 123 **Returns** 124 125 an integer bin index 126 127 **Note** 128 129 the value returned here is not an index in the overall 130 signature. It is, rather, an offset of a scaffold in the 131 possible combinations of distance bins for a given 132 proto-pharmacophore. 133 134 """ 135 nBins = len(bins) 136 nDists = len(dists) 137 whichBins = [0]*nDists 138 139 # This would be a ton easier if we had contiguous bins 140 # i.e. if we could maintain the bins as a list of bounds) 141 # because then we could use Python's bisect module. 142 # Since we can't do that, we've got to do our own binary 143 # search here. 144 for i in range(nDists): 145 dist = dists[i] 146 where = -1 147 148 # do a simple binary search: 149 startP,endP = 0,len(bins) 150 while startP<endP: 151 midP = (startP+endP) // 2 152 begBin,endBin = bins[midP] 153 if dist < begBin: 154 endP = midP 155 elif dist >= endBin: 156 startP = midP+1 157 else: 158 where = midP 159 break 160 if where < 0: 161 return None 162 whichBins[i] = where 163 res = scaffolds.index(tuple(whichBins)) 164 if _verbose: 165 print '----- _fBI -----------' 166 print ' scaffolds:',scaffolds 167 print ' bins:',whichBins 168 print ' res:',res 169 return res
170
171 - def GetFeatFamilies(self):
172 fams = [fam for fam in self.featFactory.GetFeatureFamilies() if fam not in self.skipFeats] 173 fams.sort() 174 return fams
175
176 - def GetMolFeats(self,mol):
177 featFamilies=self.GetFeatFamilies() 178 featMatches = {} 179 for fam in featFamilies: 180 featMatches[fam] = [] 181 feats = self.featFactory.GetFeaturesForMol(mol,includeOnly=fam) 182 for feat in feats: 183 featMatches[fam].append(feat.GetAtomIds()) 184 return [featMatches[x] for x in featFamilies]
185
186 - def GetBitIdx(self,featIndices,dists,sortIndices=True):
187 """ returns the index for a pharmacophore described using a set of 188 feature indices and distances 189 190 **Arguments*** 191 192 - featIndices: a sequence of feature indices 193 194 - dists: a sequence of distance between the features, only the 195 unique distances should be included, and they should be in the 196 order defined in Utils. 197 198 - sortIndices : sort the indices 199 200 **Returns** 201 202 the integer bit index 203 204 """ 205 nPoints = len(featIndices) 206 if nPoints>3: 207 raise NotImplementedError,'>3 points not supported' 208 if nPoints < self.minPointCount: raise IndexError,'bad number of points' 209 if nPoints > self.maxPointCount: raise IndexError,'bad number of points' 210 211 # this is the start of the nPoint-point pharmacophores 212 startIdx = self._starts[nPoints] 213 214 # 215 # now we need to map the pattern indices to an offset from startIdx 216 # 217 if sortIndices: 218 tmp = list(featIndices) 219 tmp.sort() 220 featIndices = tmp 221 222 if featIndices[0]<0: raise IndexError,'bad feature index' 223 if max(featIndices)>=self._nFeats: raise IndexError,'bad feature index' 224 225 if nPoints==3: 226 featIndices,dists=Utils.OrderTriangle(featIndices,dists) 227 228 229 offset = Utils.CountUpTo(self._nFeats,nPoints,featIndices) 230 if _verbose: print 'offset for feature %s: %d'%(str(featIndices),offset) 231 offset *= len(self._scaffolds[len(dists)]) 232 233 234 try: 235 if _verbose: 236 print '>>>>>>>>>>>>>>>>>>>>>>>' 237 print '\tScaffolds:',repr(self._scaffolds[len(dists)]),type(self._scaffolds[len(dists)]) 238 print '\tDists:',repr(dists),type(dists) 239 print '\tbins:',repr(self._bins),type(self._bins) 240 bin = self._findBinIdx(dists,self._bins,self._scaffolds[len(dists)]) 241 except ValueError: 242 fams = self.GetFeatFamilies() 243 fams = [fams[x] for x in featIndices] 244 raise IndexError,'distance bin not found: feats: %s; dists=%s; bins=%s; scaffolds: %s'%(fams,dists,self._bins,self._scaffolds) 245 246 return startIdx + offset + bin
247
248 - def GetBitInfo(self,idx):
249 """ returns information about the given bit 250 251 **Arguments** 252 253 - idx: the bit index to be considered 254 255 **Returns** 256 257 a 3-tuple: 258 259 1) the number of points in the pharmacophore 260 261 2) the proto-pharmacophore (tuple of pattern indices) 262 263 3) the scaffold (tuple of distance indices) 264 265 """ 266 if idx >= self._sigSize: 267 raise IndexError,'bad index (%d) queried. %d is the max'%(idx,self._sigSize) 268 # first figure out how many points are in the p'cophore 269 nPts = self.minPointCount 270 while nPts < self.maxPointCount and self._starts[nPts+1]<=idx: 271 nPts+=1 272 273 # how far are we in from the start point? 274 offsetFromStart = idx - self._starts[nPts] 275 if _verbose: 276 print '\t %d Points, %d offset'%(nPts,offsetFromStart) 277 278 # lookup the number of scaffolds 279 nDists = len(Utils.nPointDistDict[nPts]) 280 scaffolds = self._scaffolds[nDists] 281 282 nScaffolds = len(scaffolds) 283 284 # figure out to which proto-pharmacophore we belong: 285 protoIdx = offsetFromStart / nScaffolds 286 indexCombos = Utils.GetIndexCombinations(self._nFeats,nPts) 287 combo = tuple(indexCombos[protoIdx]) 288 if _verbose: 289 print '\t combo: %s'%(str(combo)) 290 291 # and which scaffold: 292 scaffoldIdx = offsetFromStart % nScaffolds 293 scaffold = scaffolds[scaffoldIdx] 294 if _verbose: 295 print '\t scaffold: %s'%(str(scaffold)) 296 return nPts,combo,scaffold
297
298 - def Init(self):
299 """ Initializes internal parameters. This **must** be called after 300 making any changes to the signature parameters 301 302 """ 303 accum = 0 304 self._scaffolds = [0]*(len(Utils.nPointDistDict[self.maxPointCount+1])) 305 self._starts = {} 306 if not self.skipFeats: 307 self._nFeats = len(self.featFactory.GetFeatureFamilies()) 308 else: 309 self._nFeats = 0 310 for fam in self.featFactory.GetFeatureFamilies(): 311 if fam not in self.skipFeats: 312 self._nFeats+=1 313 for i in range(self.minPointCount,self.maxPointCount+1): 314 self._starts[i] = accum 315 nDistsHere = len(Utils.nPointDistDict[i]) 316 scaffoldsHere = Utils.GetPossibleScaffolds(i,self._bins, 317 useTriangleInequality=self.trianglePruneBins) 318 nBitsHere = len(scaffoldsHere) 319 self._scaffolds[nDistsHere] = scaffoldsHere 320 pointsHere = Utils.NumCombinations(self._nFeats,i) * nBitsHere 321 accum += pointsHere 322 self._sigSize = accum 323 if not self.useCounts: 324 self.sigKlass = SparseBitVect 325 elif self._sigSize<2**31: 326 self.sigKlass = IntSparseIntVect 327 else: 328 self.sigKlass = LongSparseIntVect
329
330 - def GetSigSize(self):
331 return self._sigSize
332 try: 333 from rdkit.Chem.Pharmacophores import cUtils 334 except ImportError: 335 pass 336 else: 337 SigFactory._findBinIdx = cUtils.FindBinIdx 338