Package Chem :: Package AtomPairs :: Module Utils
[hide private]
[frames] | no frames]

Source Code for Module Chem.AtomPairs.Utils

  1  # $Id: Utils.py 372 2007-10-07 15:40:32Z glandrum $ 
  2  # 
  3  #  Copyright (C) 2004-2006 Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  import Chem 
  8  from Chem import rdMolDescriptors 
  9  import math 
 10   
11 -def ExplainAtomCode(code,branchSubtract=0):
12 """ 13 14 **Arguments**: 15 16 - the code to be considered 17 18 - branchSubtract: (optional) the constant that was subtracted off 19 the number of neighbors before integrating it into the code. 20 This is used by the topological torsions code. 21 22 23 >>> m = Chem.MolFromSmiles('C=CC(=O)O') 24 >>> code = GetAtomCode(m.GetAtomWithIdx(0)) 25 >>> ExplainAtomCode(code) 26 ('C', 1, 1) 27 >>> code = GetAtomCode(m.GetAtomWithIdx(1)) 28 >>> ExplainAtomCode(code) 29 ('C', 2, 1) 30 >>> code = GetAtomCode(m.GetAtomWithIdx(2)) 31 >>> ExplainAtomCode(code) 32 ('C', 3, 1) 33 >>> code = GetAtomCode(m.GetAtomWithIdx(3)) 34 >>> ExplainAtomCode(code) 35 ('O', 1, 1) 36 >>> code = GetAtomCode(m.GetAtomWithIdx(4)) 37 >>> ExplainAtomCode(code) 38 ('O', 1, 0) 39 40 """ 41 typeMask = (1<<rdMolDescriptors.AtomPairsParameters.numTypeBits)-1 42 branchMask = (1<<rdMolDescriptors.AtomPairsParameters.numBranchBits)-1 43 piMask = (1<<rdMolDescriptors.AtomPairsParameters.numPiBits)-1 44 45 nBranch = int(code&branchMask) 46 #print code, 47 code = code>>rdMolDescriptors.AtomPairsParameters.numBranchBits 48 nPi = int(code&piMask) 49 #print code, 50 code = code>>rdMolDescriptors.AtomPairsParameters.numPiBits 51 #print code, 52 typeIdx=int(code&typeMask) 53 if typeIdx<len(rdMolDescriptors.AtomPairsParameters.atomTypes): 54 atomNum = rdMolDescriptors.AtomPairsParameters.atomTypes[typeIdx] 55 atomSymbol=Chem.GetPeriodicTable().GetElementSymbol(atomNum) 56 else: 57 atomSymbol='X' 58 return (atomSymbol,nBranch,nPi)
59 60 GetAtomCode=rdMolDescriptors.GetAtomPairAtomCode 61
62 -def NumPiElectrons(atom):
63 """ Returns the number of electrons an atom is using for pi bonding 64 65 >>> m = Chem.MolFromSmiles('C=C') 66 >>> NumPiElectrons(m.GetAtomWithIdx(0)) 67 1 68 69 >>> m = Chem.MolFromSmiles('C#CC') 70 >>> NumPiElectrons(m.GetAtomWithIdx(0)) 71 2 72 >>> NumPiElectrons(m.GetAtomWithIdx(1)) 73 2 74 75 >>> m = Chem.MolFromSmiles('O=C=CC') 76 >>> NumPiElectrons(m.GetAtomWithIdx(0)) 77 1 78 >>> NumPiElectrons(m.GetAtomWithIdx(1)) 79 2 80 >>> NumPiElectrons(m.GetAtomWithIdx(2)) 81 1 82 >>> NumPiElectrons(m.GetAtomWithIdx(3)) 83 0 84 85 FIX: this behaves oddly in these cases: 86 >>> m = Chem.MolFromSmiles('S(=O)(=O)') 87 >>> NumPiElectrons(m.GetAtomWithIdx(0)) 88 2 89 90 >>> m = Chem.MolFromSmiles('S(=O)(=O)(O)O') 91 >>> NumPiElectrons(m.GetAtomWithIdx(0)) 92 0 93 94 In the second case, the S atom is tagged as sp3 hybridized. 95 96 """ 97 98 res = 0 99 if atom.GetIsAromatic(): 100 res = 1 101 elif atom.GetHybridization() != Chem.HybridizationType.SP3: 102 # the number of pi electrons is just the number of 103 # unsaturations (valence - degree): 104 res = atom.GetExplicitValence() - atom.GetDegree() 105 return res
106 107
108 -def BitsInCommon(v1,v2):
109 """ Returns the number of bits in common between two vectors 110 111 **Arguments**: 112 113 - two vectors (sequences of bit ids) 114 115 **Returns**: an integer 116 117 **Notes** 118 119 - the vectors must be sorted 120 121 - duplicate bit IDs are counted more than once 122 123 >>> BitsInCommon( (1,2,3,4,10), (2,4,6) ) 124 2 125 126 Here's how duplicates are handled: 127 >>> BitsInCommon( (1,2,2,3,4), (2,2,4,5,6) ) 128 3 129 130 """ 131 res = 0 132 v2Pos = 0 133 nV2 = len(v2) 134 for val in v1: 135 while v2Pos<nV2 and v2[v2Pos]<val: 136 v2Pos+=1 137 if v2Pos >= nV2: 138 break 139 if v2[v2Pos]==val: 140 res += 1 141 v2Pos += 1 142 return res
143 144
145 -def DiceSimilarity(v1,v2,bounds=None):
146 """ Implements the DICE similarity metric. 147 This is the recommended metric in both the Topological torsions 148 and Atom pairs papers. 149 150 **Arguments**: 151 152 - two vectors (sequences of bit ids) 153 154 **Returns**: a float. 155 156 **Notes** 157 158 - the vectors must be sorted 159 160 161 >>> DiceSimilarity( (1,2,3), (1,2,3) ) 162 1.0 163 >>> DiceSimilarity( (1,2,3), (5,6) ) 164 0.0 165 >>> DiceSimilarity( (1,2,3,4), (1,3,5,7) ) 166 0.5 167 >>> DiceSimilarity( (1,2,3,4,5,6), (1,3) ) 168 0.5 169 170 Note that duplicate bit IDs count multiple times: 171 >>> DiceSimilarity( (1,1,3,4,5,6), (1,1) ) 172 0.5 173 174 but only if they are duplicated in both vectors: 175 >>> DiceSimilarity( (1,1,3,4,5,6), (1,) )==2./7 176 True 177 178 """ 179 denom = 1.0*(len(v1)+len(v2)) 180 if not denom: 181 res = 0.0 182 else: 183 if bounds and (min(len(v1),len(v2))/denom) < bounds: 184 numer = 0.0 185 else: 186 numer = 2.0*BitsInCommon(v1,v2) 187 res = numer/denom 188 189 return res
190 191
192 -def Dot(v1,v2):
193 """ Returns the Dot product between two vectors: 194 195 **Arguments**: 196 197 - two vectors (sequences of bit ids) 198 199 **Returns**: an integer 200 201 **Notes** 202 203 - the vectors must be sorted 204 205 - duplicate bit IDs are counted more than once 206 207 >>> Dot( (1,2,3,4,10), (2,4,6) ) 208 2 209 210 Here's how duplicates are handled: 211 >>> Dot( (1,2,2,3,4), (2,2,4,5,6) ) 212 5 213 >>> Dot( (1,2,2,3,4), (2,4,5,6) ) 214 2 215 >>> Dot( (1,2,2,3,4), (5,6) ) 216 0 217 >>> Dot( (), (5,6) ) 218 0 219 220 """ 221 res = 0 222 nV1 = len(v1) 223 nV2 = len(v2) 224 i = 0 225 j = 0 226 while i<nV1: 227 v1Val = v1[i] 228 v1Count = 1 229 i+=1 230 while i<nV1 and v1[i]==v1Val: 231 v1Count += 1 232 i+=1 233 while j<nV2 and v2[j]<v1Val: 234 j+=1 235 if j < nV2 and v2[j]==v1Val: 236 v2Val = v2[j] 237 v2Count = 1 238 j+=1 239 while j<nV2 and v2[j]==v1Val: 240 v2Count+=1 241 j+=1 242 commonCount=min(v1Count,v2Count) 243 res += commonCount*commonCount 244 elif j>=nV2: 245 break 246 return res
247
248 -def CosineSimilarity(v1,v2):
249 """ Implements the Cosine similarity metric. 250 This is the recommended metric in the LaSSI paper 251 252 **Arguments**: 253 254 - two vectors (sequences of bit ids) 255 256 **Returns**: a float. 257 258 **Notes** 259 260 - the vectors must be sorted 261 262 >>> print '%.3f'%CosineSimilarity( (1,2,3,4,10), (2,4,6) ) 263 0.516 264 >>> print '%.3f'%CosineSimilarity( (1,2,2,3,4), (2,2,4,5,6) ) 265 0.714 266 >>> print '%.3f'%CosineSimilarity( (1,2,2,3,4), (1,2,2,3,4) ) 267 1.000 268 >>> print '%.3f'%CosineSimilarity( (1,2,2,3,4), (5,6,7) ) 269 0.000 270 >>> print '%.3f'%CosineSimilarity( (1,2,2,3,4), () ) 271 0.000 272 273 """ 274 d1 = Dot(v1,v1) 275 d2 = Dot(v2,v2) 276 denom = math.sqrt(d1*d2) 277 if not denom: 278 res = 0.0 279 else: 280 numer = Dot(v1,v2) 281 res = numer/denom 282 return res
283 284 285 286 287 #------------------------------------ 288 # 289 # doctest boilerplate 290 #
291 -def _test():
292 import doctest,sys 293 return doctest.testmod(sys.modules["__main__"])
294 295 296 if __name__ == '__main__': 297 import sys 298 failed,tried = _test() 299 sys.exit(failed) 300