Package rdkit :: Package Chem :: Module BRICS
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.BRICS

  1  # $Id: BRICS.py 1005 2009-03-04 16:28:31Z glandrum $ 
  2  # 
  3  #  Copyright (c) 2009, Novartis Institutes for BioMedical Research Inc. 
  4  #  All rights reserved. 
  5  #  
  6  # Redistribution and use in source and binary forms, with or without 
  7  # modification, are permitted provided that the following conditions are 
  8  # met:  
  9  # 
 10  #     * Redistributions of source code must retain the above copyright  
 11  #       notice, this list of conditions and the following disclaimer. 
 12  #     * Redistributions in binary form must reproduce the above 
 13  #       copyright notice, this list of conditions and the following  
 14  #       disclaimer in the documentation and/or other materials provided  
 15  #       with the distribution. 
 16  #     * Neither the name of Novartis Institutes for BioMedical Research Inc.  
 17  #       nor the names of its contributors may be used to endorse or promote  
 18  #       products derived from this software without specific prior written permission. 
 19  # 
 20  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 21  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 22  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 23  # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 24  # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 25  # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 26  # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 27  # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 28  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 29  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 30  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 31  # 
 32  # Created by Greg Landrum, Nov 2008 
 33   
 34  """ Implementation of the BRICS algorithm from Degen et al. ChemMedChem *3* 1503-7 (2008) 
 35   
 36  """ 
 37  from rdkit import Chem 
 38  from rdkit.Chem import rdChemReactions as Reactions 
 39  import sys,re 
 40   
 41  # These are the definitions that will be applied to fragment molecules: 
 42  environs = { 
 43    'L1':('[#6:1,#7,#8,#0][C:2](=[O:3])','[*:1][C:2](=[O:3])-[1*]'), 
 44    'L2':('[N:21;!$(N=*)]-;!@[#6:22,#0]','[2*]-[N:21][*:22]'), 
 45    'L3':('[O:31]-;!@[#6:32]','[3*]-[O:31][*:32]'), 
 46    'L4':('[#6:41]-;!@[C:42;!$(C=*)]','[*:41]-[C:42]-[4*]'), 
 47    'L5':('[N:51;!$(N*!-*);!$(N=*);!$(N-[!C])]-[C:52]','[5*]-[N:51][*:52]'), 
 48    'L6':('[C:61](=[O:62])-;!@[#6:63,#7,#8]','[6*]-[C:61](=[O:62])[*:63]'),  
 49    'L7a':('[#6:71]-;!@[C:72]','[#6:71]-;!@[C:72]=[7*]'), 
 50    'L7b':('[C:74]-;!@[#6:73]','[#6:73]-;!@[C:74]=[7*]'), 
 51    'L8':('[C:81]-;!@[C:82]-;!@[#6:83]','[8*]-[C:81]-;!@[C:82]-;!@[*:83]'), 
 52    'L9':('[n:91;$(n(:[c,n,o,s]):[c,n,o,s])]','[9*]-[*:91]'), 
 53    'L10':('[N:101;R;$(N(@C(=O))@[C,N,O,S])]','[10*]-[N:101]'), 
 54    'L11':('[S:112](-;!@[#6:111])','[*:111][S:112]-[11*]'), 
 55    'L12':('[#6:121][S:122](=[O:123])(=[O:124])','[*:121][S:122](=[O:123])(=[O:124])-[12*]'), 
 56    'L13':('[C:131;$(C(-;@[C,N,O,S])-;@[N,O,S])]','[*:131]-[13*]'), 
 57    'L14':('[c:141;$(c(:[c,n,o,s]):[n,o,s])]','[*:141]-[14*]'), 
 58    'L14b':('[c:142;$(c(:[c,n,o,s]):[n,o,s])]','[*:142]-[14*]'), 
 59    'L15':('[C:151;$(C(-;@C)-;@C)]','[*:151]-[15*]'), 
 60    'L16':('[c:161;$(c(:c):c)]','[*:161]-[16*]'), 
 61    'L16b':('[c:162;$(c(:c):c)]','[*:162]-[16*]'), 
 62   
 63    } 
 64  reactionDefs = ( 
 65    # L1 
 66    [ 
 67    "{L1}-;!@{L3}>>{L1p}.{L3p}", 
 68    "{L1}-;!@{L2}>>{L1p}.{L2p}", 
 69    "{L1}-;!@{L10}>>{L1p}.{L10p}", 
 70    ], 
 71   
 72    # L2  
 73    [ 
 74    "{L12}-;!@{L2}>>{L12p}.{L2p}", 
 75    "{L14}-;!@{L2}>>{L14p}.{L2p}", 
 76    "{L16}-;!@{L2}>>{L16p}.{L2p}", 
 77    ], 
 78     
 79    # L3  
 80    [ 
 81    "{L4}-;!@{L3}>>{L4p}.{L3p}", 
 82    "{L13}-;!@{L3}>>{L13p}.{L3p}", 
 83    "{L14}-;!@{L3}>>{L14p}.{L3p}", 
 84    "{L15}-;!@{L3}>>{L15p}.{L3p}", 
 85    "{L16}-;!@{L3}>>{L16p}.{L3p}", 
 86    ], 
 87     
 88    # L4 
 89    [ 
 90    "{L4}-;!@{L5}>>{L4p}.{L5p}", 
 91    "{L4}-;!@{L3}>>{L4p}.{L3p}", 
 92    "{L4}-;!@{L11}>>{L4p}.{L11p}", 
 93    ], 
 94   
 95    # L5 
 96    [ 
 97    "{L13}-;!@{L5}>>{L13p}.{L5p}", 
 98    "{L15}-;!@{L5}>>{L15p}.{L5p}", 
 99    ], 
100     
101    # L6 
102    [ 
103    "{L13}-;!@{L6}>>{L13p}.{L6p}", 
104    "{L14}-;!@{L6}>>{L14p}.{L6p}", 
105    "{L15}-;!@{L6}>>{L15p}.{L6p}", 
106    "{L16}-;!@{L6}>>{L16p}.{L6p}", 
107    ], 
108     
109    # L7 
110    [ 
111    "{L7a}=;!@{L7b}>>{L7ap}.{L7bp}", 
112    ], 
113   
114    # L8 
115    [ 
116    "{L9}-;!@{L8}>>{L9p}.{L8p}", 
117    "{L10}-;!@{L8}>>{L10p}.{L8p}", 
118    "{L13}-;!@{L8}>>{L13p}.{L8p}", 
119    "{L14}-;!@{L8}>>{L14p}.{L8p}", 
120    "{L15}-;!@{L8}>>{L15p}.{L8p}", 
121    "{L16}-;!@{L8}>>{L16p}.{L8p}", 
122    ], 
123     
124    # L9 
125    [ 
126    "{L9}-;!@{L15}>>{L9p}.{L15p}", 
127    "{L9}-;!@{L16}>>{L9p}.{L16p}", 
128    ], 
129     
130    # L10 
131    [ 
132    "{L10}-;!@{L13}>>{L10p}.{L13p}", 
133    "{L10}-;!@{L14}>>{L10p}.{L14p}", 
134    "{L10}-;!@{L15}>>{L10p}.{L15p}", 
135    "{L10}-;!@{L16}>>{L10p}.{L16p}", 
136    ], 
137     
138    # L11 
139    [ 
140    "{L11}-;!@{L13}>>{L11p}.{L13p}", 
141    "{L11}-;!@{L14}>>{L11p}.{L14p}", 
142    "{L11}-;!@{L15}>>{L11p}.{L15p}", 
143    "{L11}-;!@{L16}>>{L11p}.{L16p}", 
144    ], 
145     
146    # L12 
147    # none left 
148   
149    # L13 
150    [ 
151    "{L13}-;!@{L14}>>{L13p}.{L14p}", 
152    "{L13}-;!@{L15}>>{L13p}.{L15p}", 
153    "{L13}-;!@{L16}>>{L13p}.{L16p}", 
154    ], 
155     
156    # L14 
157    [ 
158    "{L14}-;!@{L14b}>>{L14p}.{L14bp}", # not in original paper 
159    "{L14}-;!@{L15}>>{L14p}.{L15p}", 
160    "{L14}-;!@{L16}>>{L14p}.{L16p}", 
161    ], 
162     
163    # L15 
164    [ 
165    "{L15}-;!@{L16}>>{L15p}.{L16p}", 
166    ], 
167   
168    # L16 
169    [ 
170    "{L16}-;!@{L16b}>>{L16p}.{L16bp}",  # not in original paper 
171    ], 
172    ) 
173   
174  smartsGps=reactionDefs 
175  for k,v in environs.iteritems(): 
176    for gp in smartsGps: 
177      for j,sma in enumerate(gp): 
178        sma = sma.replace('{%sp}'%k,v[1]).replace('{%s}'%k,v[0]) 
179        gp[j] =sma 
180  for gp in smartsGps: 
181    for defn in gp: 
182      try: 
183        Reactions.ReactionFromSmarts(defn) 
184      except: 
185        print defn 
186        raise 
187   
188  reactions = tuple([[Reactions.ReactionFromSmarts(y) for y in x] for x in smartsGps]) 
189  reverseReactions = [] 
190  for i,rxnSet in enumerate(smartsGps): 
191    for j,sma in enumerate(rxnSet): 
192      rs,ps = sma.split('>>') 
193      sma = '%s>>%s'%(ps,rs) 
194      rxn = Reactions.ReactionFromSmarts(sma) 
195      labels = re.findall(r'\[([0-9]+?)\*\]',ps) 
196      rxn._matchers=[Chem.MolFromSmiles('[%s*]'%x) for x in labels] 
197      reverseReactions.append(rxn) 
198 -def BRICSDecompose(mol,allNodes=None,minFragmentSize=2,onlyUseReactions=None, 199 silent=True,keepNonLeafNodes=False):
200 """ returns the BRICS decomposition for a molecule """ 201 mSmi = Chem.MolToSmiles(mol,1) 202 203 if allNodes is None: 204 allNodes=set() 205 206 if mSmi in allNodes: 207 return set() 208 209 activePool={mSmi:mol} 210 allNodes.add(mSmi) 211 for gpIdx,reactionGp in enumerate(reactions): 212 newPool = {} 213 while activePool: 214 matched=False 215 nSmi = activePool.keys()[0] 216 mol = activePool.pop(nSmi) 217 for rxnIdx,reaction in enumerate(reactionGp): 218 if onlyUseReactions and (gpIdx,rxnIdx) not in onlyUseReactions: 219 continue 220 if not silent: 221 print '--------' 222 print reactionDefs[gpIdx][rxnIdx] 223 ps = reaction.RunReactants((mol,)) 224 if ps: 225 if not silent: print nSmi,'->',len(ps),'products' 226 for prodSeq in ps: 227 seqOk=True 228 # we want to disqualify small fragments, so sort the product sequence by size 229 prodSeq = [(prod.GetNumAtoms(onlyHeavy=True),prod) for prod in prodSeq] 230 prodSeq.sort() 231 for nats,prod in prodSeq: 232 pSmi = Chem.MolToSmiles(prod,1) 233 if minFragmentSize>0: 234 nDummies = pSmi.count('*') 235 if nats-nDummies<minFragmentSize: 236 seqOk=False 237 break 238 prod.pSmi = pSmi 239 if seqOk: 240 matched=True 241 for nats,prod in prodSeq: 242 pSmi = prod.pSmi 243 #print '\t',nats,pSmi 244 if pSmi not in allNodes: 245 activePool[pSmi] = prod 246 allNodes.add(pSmi) 247 if not matched: 248 newPool[nSmi]=mol 249 activePool = newPool 250 return set(activePool.keys())
251 252 253 import random 254 dummyPattern=Chem.MolFromSmiles('[*]')
255 -def BRICSBuild(fragments,onlyCompleteMols=True,seeds=None,uniquify=True, 256 scrambleReagents=True,maxDepth=3):
257 seen = set() 258 if not seeds: 259 seeds = list(fragments) 260 if scrambleReagents: 261 seeds = list(seeds) 262 random.shuffle(seeds) 263 if scrambleReagents: 264 tempReactions = list(reverseReactions) 265 random.shuffle(tempReactions) 266 else: 267 tempReactions=reverseReactions 268 for seed in seeds: 269 seedIsR1=False 270 seedIsR2=False 271 nextSteps=[] 272 for rxn in tempReactions: 273 if seed.HasSubstructMatch(rxn._matchers[0]): 274 seedIsR1=True 275 if seed.HasSubstructMatch(rxn._matchers[1]): 276 seedIsR2=True 277 for fragment in fragments: 278 ps = None 279 if fragment.HasSubstructMatch(rxn._matchers[0]): 280 if seedIsR2: 281 ps = rxn.RunReactants((fragment,seed)) 282 if fragment.HasSubstructMatch(rxn._matchers[1]): 283 if seedIsR1: 284 ps = rxn.RunReactants((seed,fragment)) 285 if ps: 286 for p in ps: 287 if uniquify: 288 pSmi =Chem.MolToSmiles(p[0],True) 289 if pSmi in seen: 290 continue 291 else: 292 seen.add(pSmi) 293 if p[0].HasSubstructMatch(dummyPattern): 294 nextSteps.append(p[0]) 295 if not onlyCompleteMols: 296 yield p[0] 297 else: 298 yield p[0] 299 if nextSteps and maxDepth>1: 300 for p in BRICSBuild(fragments,onlyCompleteMols=onlyCompleteMols, 301 seeds=nextSteps,uniquify=uniquify, 302 maxDepth=maxDepth-1): 303 if uniquify: 304 pSmi =Chem.MolToSmiles(p,True) 305 if pSmi in seen: 306 continue 307 else: 308 seen.add(pSmi) 309 yield p
310 311 312 313 # ------- ------- ------- ------- ------- ------- ------- ------- 314 # Begin testing code 315 if __name__=='__main__': 316 import unittest
317 - class TestCase(unittest.TestCase):
318 - def test1(self):
319 m = Chem.MolFromSmiles('CC(=O)OC') 320 res = BRICSDecompose(m) 321 self.failUnless(res) 322 self.failUnless(len(res)==2) 323 324 m = Chem.MolFromSmiles('CC(=O)N1CCC1=O') 325 res = BRICSDecompose(m) 326 self.failUnless(res) 327 self.failUnless(len(res)==2) 328 329 m = Chem.MolFromSmiles('c1ccccc1N(C)C') 330 res = BRICSDecompose(m) 331 self.failUnless(res) 332 self.failUnless(len(res)==2) 333 334 m = Chem.MolFromSmiles('c1cccnc1N(C)C') 335 res = BRICSDecompose(m) 336 self.failUnless(res) 337 self.failUnless(len(res)==2) 338 339 m = Chem.MolFromSmiles('o1ccnc1N(C)C') 340 res = BRICSDecompose(m) 341 self.failUnless(res) 342 self.failUnless(len(res)==2) 343 344 m = Chem.MolFromSmiles('c1ccccc1OC') 345 res = BRICSDecompose(m) 346 self.failUnless(res) 347 self.failUnless(len(res)==2) 348 349 m = Chem.MolFromSmiles('o1ccnc1OC') 350 res = BRICSDecompose(m) 351 self.failUnless(res) 352 self.failUnless(len(res)==2) 353 354 m = Chem.MolFromSmiles('O1CCNC1OC') 355 res = BRICSDecompose(m) 356 self.failUnless(res) 357 self.failUnless(len(res)==2) 358 359 m = Chem.MolFromSmiles('CCCSCC') 360 res = BRICSDecompose(m) 361 self.failUnless(res) 362 self.failUnless(len(res)==4)
363
364 - def test2(self):
365 # example from the paper, nexavar: 366 m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1') 367 res = BRICSDecompose(m) 368 self.failUnless(res) 369 self.failUnless(len(res)==7,res)
370
371 - def test3(self):
372 m = Chem.MolFromSmiles('FC(F)(F)C1=C(Cl)C=CC(NC(=O)NC2=CC=CC=C2)=C1') 373 res = BRICSDecompose(m) 374 self.failUnless(res) 375 self.failUnless(len(res)==3,res)
376 377
378 - def test4(self):
379 allNodes = set() 380 m = Chem.MolFromSmiles('c1ccccc1OCCC') 381 res = BRICSDecompose(m,allNodes=allNodes) 382 self.failUnless(res) 383 leaves=res 384 self.failUnless(len(allNodes)==5,allNodes) 385 res = BRICSDecompose(m,allNodes=allNodes) 386 self.failIf(res) 387 self.failUnless(len(allNodes)==5,allNodes) 388 389 m = Chem.MolFromSmiles('c1ccccc1OCCCC') 390 res = BRICSDecompose(m,allNodes=allNodes) 391 self.failUnless(res) 392 leaves.update(res) 393 self.failUnless(len(allNodes)==8,allNodes) 394 self.failUnless(len(leaves)==6,leaves) 395 396 397 m = Chem.MolFromSmiles('c1cc(C(=O)NCC)ccc1OCCC') 398 res = BRICSDecompose(m,allNodes=allNodes) 399 self.failUnless(res) 400 leaves.update(res) 401 self.failUnless(len(allNodes)==13,allNodes) 402 self.failUnless(len(leaves)==9,leaves)
403
404 - def test5(self):
405 allNodes = set() 406 frags = [ 407 '[14*]c1ncncn1', 408 '[16*]c1ccccc1', 409 '[14*]c1ncccc1', 410 ] 411 frags = [Chem.MolFromSmiles(x) for x in frags] 412 res = BRICSBuild(frags) 413 self.failUnless(res) 414 res= list(res) 415 self.failUnless(len(res)==6) 416 smis = [Chem.MolToSmiles(x,True) for x in res] 417 self.failUnless('c1ccc(-c2ccccc2)cc1' in smis) 418 self.failUnless('c1ccc(-c2ncccc2)cc1' in smis)
419
420 - def test6(self):
421 allNodes = set() 422 frags = [ 423 '[16*]c1ccccc1', 424 '[3*]OC', 425 '[9*]n1cccc1', 426 ] 427 frags = [Chem.MolFromSmiles(x) for x in frags] 428 res = BRICSBuild(frags) 429 self.failUnless(res) 430 res= list(res) 431 self.failUnless(len(res)==3) 432 smis = [Chem.MolToSmiles(x,True) for x in res] 433 self.failUnless('c1ccc(-c2ccccc2)cc1' in smis) 434 self.failUnless('COc1ccccc1' in smis) 435 self.failUnless('c1ccc(-n2cccc2)cc1' in smis)
436
437 - def test7(self):
438 allNodes = set() 439 frags = [ 440 '[16*]c1ccccc1', 441 '[3*]OC', 442 '[3*]OCC(=O)[6*]', 443 ] 444 frags = [Chem.MolFromSmiles(x) for x in frags] 445 res = BRICSBuild(frags) 446 self.failUnless(res) 447 res= list(res) 448 smis = [Chem.MolToSmiles(x,True) for x in res] 449 self.failUnless(len(res)==3) 450 self.failUnless('c1ccc(-c2ccccc2)cc1' in smis) 451 self.failUnless('COc1ccccc1' in smis) 452 self.failUnless('O=C(COc1ccccc1)c1ccccc1' in smis)
453
454 - def test8(self):
455 random.seed(23) 456 base = Chem.MolFromSmiles("n1cncnc1OCC(C1CC1)OC1CNC1") 457 catalog = BRICSDecompose(base) 458 self.failUnless(len(catalog)==8) 459 catalog = [Chem.MolFromSmiles(x) for x in catalog] 460 ms = list(BRICSBuild(catalog)) 461 self.failUnless(len(ms)==19)
462 463 464 unittest.main() 465