Package rdkit :: Package Chem :: Package Features :: Module FeatDirUtils
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Features.FeatDirUtils

  1  # $Id: FeatDirUtils.py 997 2009-02-25 06:12:43Z glandrum $ 
  2  # 
  3  # Copyright (C) 2004-2006 Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  from Numeric import * 
  8  import math 
  9   
 10  # BIG NOTE: we are going assume atom IDs starting from 0 instead of 1 
 11  # for all the functions in this file. This is so that they 
 12  # are reasonably indepedent of the combicode. However when using 
 13  # with combicode the caller needs to make sure the atom IDs from combicode 
 14  # are corrected before feeding them in here. 
 15   
16 -def cross(v1,v2):
17 res = array([ v1[1]*v2[2] - v1[2]*v2[1], 18 -v1[0]*v2[2] + v1[2]*v2[0], 19 v1[0]*v2[1] - v1[1]*v2[0]],Float) 20 return res
21
22 -def findNeighbors(atomId, adjMat):
23 """ 24 Find the IDs of the neighboring atom IDs 25 26 ARGUMENTS: 27 atomId - atom of interest 28 adjMat - adjacency matrix for the compound 29 """ 30 nbrs = [] 31 for i,nid in enumerate(adjMat[atomId]): 32 if nid >= 1 : 33 nbrs.append(i) 34 return nbrs
35
36 -def _findAvgVec(coords, center, nbrs) :
37 # find the average of the normalized vectors going from the center atoms to the 38 # neighbors 39 # the average vector is also normalized 40 cpt = array(coords[3*center:3*center+3]) 41 avgVec = 0 42 for nid in nbrs: 43 pt = array(coords[3*nid:3*nid+3]) 44 pt -= cpt 45 pt /= (sqrt(innerproduct(pt, pt))) 46 if (avgVec == 0) : 47 avgVec = pt 48 else : 49 avgVec += pt 50 51 avgVec /= (sqrt(innerproduct(avgVec, avgVec))) 52 return avgVec
53
54 -def GetAromaticFeatVects(coords, featAtoms, featLoc, scale=1.5):
55 """ 56 Compute the direction vector for an aromatic feature 57 58 ARGUMENTS: 59 coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] 60 featAtoms - list of atom IDs that make up the feature 61 featLoc - location of the aromatic feature specified as a numeric array 62 scale - the size of the direction vector 63 """ 64 dirType = 'linear' 65 head = featLoc 66 ats = [coords[3*(x):3*(x)+3] for x in featAtoms] 67 68 p0 = array(ats[0]) 69 p1 = array(ats[1]) 70 v1 = p0-head 71 v2 = p1-head 72 norm1 = cross(v1,v2) 73 norm1 = norm1/sqrt(innerproduct(norm1,norm1)) 74 norm1 *= scale 75 norm2 = -norm1 76 norm1 += head 77 norm2 += head 78 return ( (head,norm1),(head,norm2) ), dirType
79
80 -def ArbAxisRotation(theta,ax,pt):
81 theta = math.pi*theta/180 82 c = math.cos(theta) 83 s = math.sin(theta) 84 t = 1-c 85 X = ax[0] 86 Y = ax[1] 87 Z = ax[2] 88 mat = [ [t*X*X+c, t*X*Y+s*Z, t*X*Z-s*Y], 89 [t*X*Y-s*Z,t*Y*Y+c,t*Y*Z+s*X], 90 [t*X*Z+s*Y,t*Y*Z-s*X,t*Z*Z+c] ] 91 mat = array(mat) 92 return matrixmultiply(mat,pt)
93
94 -def GetAcceptor2FeatVects(coords, adjMat, atomNames, featAtoms, scale=1.5):
95 """ 96 Get the direction vectors for Acceptor of type 2 97 98 This is the acceptor with two adjacent heavy atoms. We will special case a few things here. 99 If the acceptor atom is an oxygen we will assume a sp3 hybridization 100 the acceptor directions (two of them) 101 reflect that configurations. Otherwise the direction vector in plane with the neighboring 102 heavy atoms 103 104 ARGUMENTS: 105 coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] 106 adjMat - adjacency martix of the compound 107 featAtoms - list of atoms that are part of the feature 108 scale - length of the direction vector 109 """ 110 assert len(featAtoms) == 1 111 aid = featAtoms[0] 112 cpt = array(coords[3*aid:3*aid+3]) 113 nbrs = findNeighbors(aid, adjMat) 114 assert len(nbrs) == 2 115 116 bvec = _findAvgVec(coords, aid, nbrs) 117 bvec *= (-1.0*scale) 118 #bvec += cpt 119 120 if (atomNames[aid] == 'O') : 121 # assume sp3 122 # we will create two vectors by rotating bvec by half the tetrahedral angle in either directions 123 v1 = array(coords[3*nbrs[0]:3*nbrs[0]+3]) 124 v1 -= cpt 125 v2 = array(coords[3*nbrs[1]:3*nbrs[1]+3]) 126 v2 -= cpt 127 rotAxis = v1 - v2 128 rotAxis /= sqrt(innerproduct(rotAxis, rotAxis)) 129 bv1 = ArbAxisRotation(54.5, rotAxis, bvec) 130 bv1 += cpt 131 bv2 = ArbAxisRotation(-54.5, rotAxis, bvec) 132 bv2 += cpt 133 return ((cpt, bv1), (cpt, bv2),), 'linear' 134 else : 135 bvec += cpt 136 return ((cpt, bvec),), 'linear'
137
138 -def GetDonor3FeatVects(coords, adjMat, featAtoms, scale=1.5):
139 """ 140 Get the direction vectors for Donor of type 3 141 142 This is a donor with three heavy atoms as neighbors. We will assume 143 a tetrahedral arrangement of these neighbors. So the direction we are seeking 144 is the last fourth arm of the sp3 arrangment 145 146 ARGUMENTS: 147 coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] 148 adjMat - adjacency martix of the compound 149 featAtoms - list of atoms that are part of the feature 150 scale - length of the direction vector 151 """ 152 assert len(featAtoms) == 1 153 aid = featAtoms[0] 154 cpt = array(coords[3*aid:3*aid+3]) 155 nbrs = findNeighbors(aid, adjMat) 156 157 bvec = _findAvgVec(coords, aid, nbrs) 158 bvec *= (-1.0*scale) 159 bvec += cpt 160 return ((cpt, bvec),), 'linear'
161
162 -def _findHydAtoms(nbrs, atomNames):
163 hAtoms = [] 164 for nid in nbrs: 165 if atomNames[nid] == 'H': 166 hAtoms.append(nid) 167 return hAtoms
168
169 -def _checkPlanarity(coords, center, nbrs, tol=1.0e-3):
170 assert len(nbrs) == 3 171 cpt = array(coords[3*center:3*center+3]) 172 v1 = array(coords[3*nbrs[0]:3*nbrs[0]+3]) 173 v1 -= cpt 174 v2 = array(coords[3*nbrs[1]:3*nbrs[1]+3]) 175 v2 -= cpt 176 v3 = array(coords[3*nbrs[2]:3*nbrs[2]+3]) 177 v3 -= cpt 178 normal = cross(v1,v2) 179 dotP = abs(innerproduct(v3, normal)) 180 181 if (dotP <= tol) : 182 return 1 183 else : 184 return 0
185
186 -def GetDonor2FeatVects(coords, adjMat, atomNames, featAtoms, scale=1.5) :
187 """ 188 Get the direction vectors for Donor of type 2 189 190 This is a donor with two heavy atoms as neighbors. The atom may are may not have 191 hydrogen on it. Here are the situations with the neighbors that will be considered here 192 1. two heavy atoms and two hydrogens: we will assume a sp3 arrangement here 193 2. two heavy atoms and one hydrogen: this can either be sp2 or sp3 194 3. two heavy atoms and no hydrogens 195 196 ARGUMENTS: 197 coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] 198 adjMat - adjacency martix of the compound 199 atomNames - element names of the atoms in the compound 200 featAtoms - list of atoms that are part of the feature 201 scale - length of the direction vector 202 """ 203 204 assert len(featAtoms) == 1 205 aid = featAtoms[0] 206 cpt = array(coords[3*aid:3*aid+3]) 207 # find the two atoms that are neighbors of this atoms 208 nbrs = findNeighbors(aid, adjMat) 209 assert len(nbrs) >= 2 210 211 hydrogens = _findHydAtoms(nbrs, atomNames) 212 213 if len(nbrs) == 2: 214 # there should be no hydrogens in this case 215 assert len(hydrogens) == 0 216 # in this case the direction is the opposite of the average vector of the two neighbors 217 bvec = _findAvgVec(coords, aid, nbrs) 218 bvec *= (-1.0*scale) 219 bvec += cpt 220 return ((cpt, bvec),), 'linear' 221 222 elif len(nbrs) == 3: 223 assert len(hydrogens) == 1 224 # this is a little more tricky we have to check if the hydrogen is in the plane of the 225 # two heavy atoms (i.e. sp2 arrangement) or out of plane (sp3 arrangement) 226 227 # one of the directions will be from hydrogen atom to the heavy atom 228 hid = hydrogens[0] 229 bvec = array(coords[3*hid:3*hid+3]) 230 bvec -= cpt 231 bvec /= (sqrt(innerproduct(bvec, bvec))) 232 bvec *= scale 233 bvec += cpt 234 if _checkPlanarity(coords, aid, nbrs): 235 # only the hydrogen atom direction needs to be used 236 return ((cpt, bvec),), 'linear' 237 else : 238 # we have a non-planar configuration - we will assume sp3 and compute a second direction vector 239 ovec = _findAvgVec(coords, aid, nbrs) 240 ovec *= (-1.0*scale) 241 ovec += cpt 242 return ((cpt, bvec), (cpt, ovec),), 'linear' 243 244 elif len(nbrs) >= 4 : 245 # in this case we should have two or more hydrogens we will simple use there directions 246 res = [] 247 for hid in hydrogenss: 248 bvec = array(coords[3*(hid):3*hid+3]) 249 bvec -= cpt 250 bvec /= (sqrt(innerproduct(bvec, bvec))) 251 bvec *= scale 252 bvec += cpt 253 res.append((cpt, bvec)) 254 return tuple(res), 'linear'
255
256 -def GetDonor1FeatVects(coords, adjMat, atomNames, featAtoms, scale=1.5) :
257 """ 258 Get the direction vectors for Donor of type 1 259 260 This is a donor with one heavy atom. It is not clear where we should we should be putting the 261 direction vector for this. It should probably be a cone. In this case we will just use the 262 direction vector from the donor atom to the heavy atom 263 264 ARGUMENTS: 265 coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] 266 adjMat - adjacency martix of the compound 267 atomNames - element names of the atoms in the compound 268 featAtoms - list of atoms that are part of the feature 269 scale - length of the direction vector 270 """ 271 assert len(featAtoms) == 1 272 aid = featAtoms[0] 273 nbrs = findNeighbors(aid, adjMat) 274 275 # find the neighboring heavy atom 276 hnbr = -1 277 for nid in nbrs: 278 if atomNames[nid] != 'H': 279 hnbr = nid 280 break 281 282 cpt = array(coords[3*aid:3*aid+3]) 283 v1 = array(coords[3*hnbr:3*hnbr+3]) 284 v1 -= cpt 285 v1 /= (sqrt(innerproduct(v1, v1))) 286 v1 *= (-1.0*scale) 287 v1 += cpt 288 return ((cpt, v1),), 'cone'
289
290 -def GetAcceptor1FeatVects(coords, adjMat, atomNames, featAtoms, scale=1.5) :
291 """ 292 Get the direction vectors for Acceptor of type 1 293 294 This is a acceptor with one heavy atom neighbor. There are two possibilities we will 295 consider here 296 1. The bond to the heavy atom is a single bond e.g. CO 297 In this case we don't know the exact direction and we just use the inversion of this bond direction 298 and mark this direction as a 'cone' 299 2. The bond to the heavy atom is a double bond e.g. C=O 300 In this case the we have two possible direction except in some special cases e.g. SO2 301 where again we will use bond direction 302 303 ARGUMENTS: 304 coords - an list of atom coordinates the format is [x1, y1, z1, x2, y2, z3 ....] 305 adjMat - adjacency martix of the compound 306 atomNames - element names of the atoms in the compound 307 featAtoms - list of atoms that are part of the feature 308 scale - length of the direction vector 309 """ 310 assert len(featAtoms) == 1 311 aid = featAtoms[0] 312 nbrs = findNeighbors(aid, adjMat) 313 cpt = array(coords[3*aid:3*aid+3]) 314 # find the adjacent heavy atom 315 heavyAt = -1 316 for nbr in nbrs: 317 if atomNames[nbr] != 'H': 318 heavyAt = nbr 319 break 320 321 singleBnd = 1 322 if adjMat[aid][heavyAt] >= 1.5: 323 singleBnd = 0 324 325 # special scale - if the heavy atom is a sulfer (we sould proabably check phosphorous as well 326 sulfer = 0 327 if atomNames[heavyAt] == 'S': 328 sulfer = 1 329 330 if singleBnd or sulfer: 331 v1 = array(coords[3*heavyAt:3*heavyAt+3]) 332 v1 -= cpt 333 v1 /= (sqrt(innerproduct(v1, v1))) 334 v1 *= (-1.0*scale) 335 v1 += cpt 336 return ((cpt, v1),), 'cone' 337 338 else : 339 # ok in this case we will assume that 340 # heavy atom is sp2 hybridized and the diretion vectors (two of them) 341 # are in the same plane, we will find this plane by looking for one 342 # of the neighbors of the heavy atom 343 hvNbrs = findNeighbors(heavyAt, adjMat) 344 hvNbr = -1 345 for nbr in hvNbrs: 346 if (nbr != aid) : 347 hvNbr = nbr 348 break 349 350 pt1 = array(coords[3*hvNbr:3*hvNbr+3]) 351 v1 = array(coords[3*heavyAt:3*heavyAt+3]) 352 pt1 -= v1 353 v1 -= cpt 354 rotAxis = cross(v1, pt1) 355 rotAxis /= sqrt(innerproduct(rotAxis, rotAxis)) 356 bv1 = ArbAxisRotation(120, rotAxis, v1) 357 bv1 /= sqrt(innerproduct(bv1, bv1)) 358 bv1 *= scale 359 bv1 += cpt 360 bv2 = ArbAxisRotation(-120, rotAxis, v1) 361 bv2 /= sqrt(innerproduct(bv2, bv2)) 362 bv2 *= scale 363 bv2 += cpt 364 return ((cpt, bv1), (cpt, bv2),), 'linear'
365