Package rdkit :: Package Chem :: Package Pharm2D :: Module LazyGenerator
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Pharm2D.LazyGenerator

  1  # $Id: LazyGenerator.py 1022 2009-03-19 04:46:11Z glandrum $ 
  2  # 
  3  # Copyright (C) 2003-2006 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  raise NotImplementedError,'not finished yet' 
  8  """ lazy generator of 2D pharmacophore signature data 
  9   
 10  """ 
 11  import rdkit.Chem 
 12  from rdkit.Chem.Pharm2D import SigFactory,Matcher,Utils 
 13   
14 -class Generator(object):
15 """ 16 17 Important attributes: 18 19 - mol: the molecules whose signature is being worked with 20 21 - sigFactory : the SigFactory object with signature parameters 22 NOTE: no preprocessing is carried out for _sigFactory_. 23 It *must* be pre-initialized. 24 25 **Notes** 26 27 - 28 """
29 - def __init__(self,sigFactory,mol,dMat=None,bitCache=True):
30 """ constructor 31 32 **Arguments** 33 34 - sigFactory: a signature factory, see class docs 35 36 - mol: a molecule, see class docs 37 38 - dMat: (optional) a distance matrix for the molecule. If this 39 is not provided, one will be calculated 40 41 - bitCache: (optional) if nonzero, a local cache of which bits 42 have been queried will be maintained. Otherwise things must 43 be recalculate each time a bit is queried. 44 45 """ 46 if not isinstance(sigFactory,SigFactory.SigFactory): 47 raise ValueError,'bad factory' 48 49 self.sigFactory = sigFactory 50 self.mol = mol 51 52 if dMat is None: 53 useBO = sigFactory.includeBondOrder 54 dMat = Chem.GetDistanceMatrix(mol,useBO) 55 56 self.dMat = dMat 57 58 if bitCache: 59 self.bits = {} 60 else: 61 self.bits = None 62 63 featFamilies=[fam for fam in sigFactory.featFactory.GetFeatureFamilies() if fam not in sigFactory.skipFeats] 64 nFeats = len(featFamilies) 65 featMatches={} 66 for fam in featFamilies: 67 featMatches[fam] = [] 68 feats = sigFactory.featFactory.GetFeaturesForMol(mol) 69 for feat in feats: 70 if feat.GetFamily() not in sigFactory.skipFeats: 71 featMatches[feat.GetFamily()].append(feat.GetAtomIds()) 72 featMatches = [None]*nFeats 73 for i in range(nFeats): 74 featMatches[i]=sigFactory.featFactory.GetMolFeature() 75 self.pattMatches = pattMatches
76
77 - def GetBit(self,idx):
78 """ returns a bool indicating whether or not the bit is set 79 80 """ 81 if idx < 0 or idx >= self.sig.GetSize(): 82 raise IndexError,'Index %d invalid'%(idx) 83 if self.bits is not None and self.bits.has_key(idx): 84 return self.bits[idx] 85 86 tmp = Matcher.GetAtomsMatchingBit(self.sig,idx,self.mol, 87 dMat=self.dMat,justOne=1, 88 matchingAtoms=self.pattMatches) 89 if not tmp or len(tmp)==0: res = 0 90 else: res = 1 91 92 if self.bits is not None: 93 self.bits[idx] = res 94 return res
95
96 - def __len__(self):
97 """ allows class to support len() 98 99 """ 100 return self.sig.GetSize()
101 - def __getitem__(self,itm):
102 """ allows class to support random access. 103 Calls self.GetBit() 104 105 """ 106 return self.GetBit(itm)
107 108 109 110 111 if __name__ == '__main__': 112 import time 113 from rdkit import RDConfig,Chem 114 from rdkit.Chem.Pharm2D import Gobbi_Pharm2D,Generate 115 import random 116 117 factory = Gobbi_Pharm2D.factory 118 nToDo=100 119 inD = open(RDConfig.RDDataDir+"/NCI/first_5K.smi",'r').readlines()[:nToDo] 120 mols = [None]*len(inD) 121 for i in range(len(inD)): 122 smi = inD[i].split('\t')[0] 123 smi.strip() 124 mols[i] = Chem.MolFromSmiles(smi) 125 126 sig = factory.GetSignature() 127 128 nBits = 300 129 random.seed(23) 130 bits = [random.randint(0,sig.GetSize()-1) for x in range(nBits)] 131 132 print 'Using the Lazy Generator' 133 t1 = time.time() 134 for i in range(len(mols)): 135 if not i % 10: print 'done mol %d of %d'%(i,len(mols)) 136 gen = Generator(factory,mols[i]) 137 for bit in bits: 138 v = gen[bit] 139 t2 = time.time() 140 print '\tthat took %4.2f seconds'%(t2-t1) 141 142 143 print 'Generating and checking signatures' 144 t1 = time.time() 145 for i in range(len(mols)): 146 if not i % 10: print 'done mol %d of %d'%(i,len(mols)) 147 sig = Generate.Gen2DFingerprint(mols[i],factory) 148 for bit in bits: 149 v = sig[bit] 150 t2 = time.time() 151 print '\tthat took %4.2f seconds'%(t2-t1) 152