1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 """ Implementation of the BRICS algorithm from Degen et al. ChemMedChem *3* 1503-7 (2008)
35
36 """
37 from rdkit import Chem
38 from rdkit.Chem import rdChemReactions as Reactions
39 import sys,re
40
41
42 environs = {
43 'L1':('[#6:1,#7,#8,#0][C:2](=[O:3])','[*:1][C:2](=[O:3])-[1*]'),
44 'L2':('[N:21;!$(N=*)]-;!@[#6:22,#0]','[2*]-[N:21][*:22]'),
45 'L3':('[O:31]-;!@[#6:32]','[3*]-[O:31][*:32]'),
46 'L4':('[#6:41]-;!@[C:42;!$(C=*)]','[*:41]-[C:42]-[4*]'),
47 'L5':('[N:51;!$(N*!-*);!$(N=*);!$(N-[!C])]-[C:52]','[5*]-[N:51][*:52]'),
48 'L6':('[C:61](=[O:62])-;!@[#6:63,#7,#8]','[6*]-[C:61](=[O:62])[*:63]'),
49 'L7a':('[#6:71]-;!@[C:72]','[#6:71]-;!@[C:72]=[7*]'),
50 'L7b':('[C:74]-;!@[#6:73]','[#6:73]-;!@[C:74]=[7*]'),
51 'L8':('[C:81]-;!@[C:82]-;!@[#6:83]','[8*]-[C:81]-;!@[C:82]-;!@[*:83]'),
52 'L9':('[n:91;$(n(:[c,n,o,s]):[c,n,o,s])]','[9*]-[*:91]'),
53 'L10':('[N:101;R;$(N(@C(=O))@[C,N,O,S])]','[10*]-[N:101]'),
54 'L11':('[S:112](-;!@[#6:111])','[*:111][S:112]-[11*]'),
55 'L12':('[#6:121][S:122](=[O:123])(=[O:124])','[*:121][S:122](=[O:123])(=[O:124])-[12*]'),
56 'L13':('[C:131;$(C(-;@[C,N,O,S])-;@[N,O,S])]','[*:131]-[13*]'),
57 'L14':('[c:141;$(c(:[c,n,o,s]):[n,o,s])]','[*:141]-[14*]'),
58 'L14b':('[c:142;$(c(:[c,n,o,s]):[n,o,s])]','[*:142]-[14*]'),
59 'L15':('[C:151;$(C(-;@C)-;@C)]','[*:151]-[15*]'),
60 'L16':('[c:161;$(c(:c):c)]','[*:161]-[16*]'),
61 'L16b':('[c:162;$(c(:c):c)]','[*:162]-[16*]'),
62
63 }
64 reactionDefs = (
65
66 [
67 "{L1}-;!@{L3}>>{L1p}.{L3p}",
68 "{L1}-;!@{L2}>>{L1p}.{L2p}",
69 "{L1}-;!@{L10}>>{L1p}.{L10p}",
70 ],
71
72
73 [
74 "{L12}-;!@{L2}>>{L12p}.{L2p}",
75 "{L14}-;!@{L2}>>{L14p}.{L2p}",
76 "{L16}-;!@{L2}>>{L16p}.{L2p}",
77 ],
78
79
80 [
81 "{L4}-;!@{L3}>>{L4p}.{L3p}",
82 "{L13}-;!@{L3}>>{L13p}.{L3p}",
83 "{L14}-;!@{L3}>>{L14p}.{L3p}",
84 "{L15}-;!@{L3}>>{L15p}.{L3p}",
85 "{L16}-;!@{L3}>>{L16p}.{L3p}",
86 ],
87
88
89 [
90 "{L4}-;!@{L5}>>{L4p}.{L5p}",
91 "{L4}-;!@{L3}>>{L4p}.{L3p}",
92 "{L4}-;!@{L11}>>{L4p}.{L11p}",
93 ],
94
95
96 [
97 "{L13}-;!@{L5}>>{L13p}.{L5p}",
98 "{L15}-;!@{L5}>>{L15p}.{L5p}",
99 ],
100
101
102 [
103 "{L13}-;!@{L6}>>{L13p}.{L6p}",
104 "{L14}-;!@{L6}>>{L14p}.{L6p}",
105 "{L15}-;!@{L6}>>{L15p}.{L6p}",
106 "{L16}-;!@{L6}>>{L16p}.{L6p}",
107 ],
108
109
110 [
111 "{L7a}=;!@{L7b}>>{L7ap}.{L7bp}",
112 ],
113
114
115 [
116 "{L9}-;!@{L8}>>{L9p}.{L8p}",
117 "{L10}-;!@{L8}>>{L10p}.{L8p}",
118 "{L13}-;!@{L8}>>{L13p}.{L8p}",
119 "{L14}-;!@{L8}>>{L14p}.{L8p}",
120 "{L15}-;!@{L8}>>{L15p}.{L8p}",
121 "{L16}-;!@{L8}>>{L16p}.{L8p}",
122 ],
123
124
125 [
126 "{L9}-;!@{L15}>>{L9p}.{L15p}",
127 "{L9}-;!@{L16}>>{L9p}.{L16p}",
128 ],
129
130
131 [
132 "{L10}-;!@{L13}>>{L10p}.{L13p}",
133 "{L10}-;!@{L14}>>{L10p}.{L14p}",
134 "{L10}-;!@{L15}>>{L10p}.{L15p}",
135 "{L10}-;!@{L16}>>{L10p}.{L16p}",
136 ],
137
138
139 [
140 "{L11}-;!@{L13}>>{L11p}.{L13p}",
141 "{L11}-;!@{L14}>>{L11p}.{L14p}",
142 "{L11}-;!@{L15}>>{L11p}.{L15p}",
143 "{L11}-;!@{L16}>>{L11p}.{L16p}",
144 ],
145
146
147
148
149
150 [
151 "{L13}-;!@{L14}>>{L13p}.{L14p}",
152 "{L13}-;!@{L15}>>{L13p}.{L15p}",
153 "{L13}-;!@{L16}>>{L13p}.{L16p}",
154 ],
155
156
157 [
158 "{L14}-;!@{L14b}>>{L14p}.{L14bp}",
159 "{L14}-;!@{L15}>>{L14p}.{L15p}",
160 "{L14}-;!@{L16}>>{L14p}.{L16p}",
161 ],
162
163
164 [
165 "{L15}-;!@{L16}>>{L15p}.{L16p}",
166 ],
167
168
169 [
170 "{L16}-;!@{L16b}>>{L16p}.{L16bp}",
171 ],
172 )
173
174 smartsGps=reactionDefs
175 for k,v in environs.iteritems():
176 for gp in smartsGps:
177 for j,sma in enumerate(gp):
178 sma = sma.replace('{%sp}'%k,v[1]).replace('{%s}'%k,v[0])
179 gp[j] =sma
180 for gp in smartsGps:
181 for defn in gp:
182 try:
183 Reactions.ReactionFromSmarts(defn)
184 except:
185 print defn
186 raise
187
188 reactions = tuple([[Reactions.ReactionFromSmarts(y) for y in x] for x in smartsGps])
189 reverseReactions = []
190 for i,rxnSet in enumerate(smartsGps):
191 for j,sma in enumerate(rxnSet):
192 rs,ps = sma.split('>>')
193 sma = '%s>>%s'%(ps,rs)
194 rxn = Reactions.ReactionFromSmarts(sma)
195 labels = re.findall(r'\[([0-9]+?)\*\]',ps)
196 rxn._matchers=[Chem.MolFromSmiles('[%s*]'%x) for x in labels]
197 reverseReactions.append(rxn)
198 -def BRICSDecompose(mol,allNodes=None,minFragmentSize=2,onlyUseReactions=None,
199 silent=True,keepNonLeafNodes=False):
200 """ returns the BRICS decomposition for a molecule """
201 mSmi = Chem.MolToSmiles(mol,1)
202
203 if allNodes is None:
204 allNodes=set()
205
206 if mSmi in allNodes:
207 return set()
208
209 activePool={mSmi:mol}
210 allNodes.add(mSmi)
211 for gpIdx,reactionGp in enumerate(reactions):
212 newPool = {}
213 while activePool:
214 matched=False
215 nSmi = activePool.keys()[0]
216 mol = activePool.pop(nSmi)
217 for rxnIdx,reaction in enumerate(reactionGp):
218 if onlyUseReactions and (gpIdx,rxnIdx) not in onlyUseReactions:
219 continue
220 if not silent:
221 print '--------'
222 print reactionDefs[gpIdx][rxnIdx]
223 ps = reaction.RunReactants((mol,))
224 if ps:
225 if not silent: print nSmi,'->',len(ps),'products'
226 for prodSeq in ps:
227 seqOk=True
228
229 prodSeq = [(prod.GetNumAtoms(onlyHeavy=True),prod) for prod in prodSeq]
230 prodSeq.sort()
231 for nats,prod in prodSeq:
232 pSmi = Chem.MolToSmiles(prod,1)
233 if minFragmentSize>0:
234 nDummies = pSmi.count('*')
235 if nats-nDummies<minFragmentSize:
236 seqOk=False
237 break
238 prod.pSmi = pSmi
239 if seqOk:
240 matched=True
241 for nats,prod in prodSeq:
242 pSmi = prod.pSmi
243
244 if pSmi not in allNodes:
245 activePool[pSmi] = prod
246 allNodes.add(pSmi)
247 if not matched:
248 newPool[nSmi]=mol
249 activePool = newPool
250 return set(activePool.keys())
251
252
253 import random
254 dummyPattern=Chem.MolFromSmiles('[*]')
255 -def BRICSBuild(fragments,onlyCompleteMols=True,seeds=None,uniquify=True,
256 scrambleReagents=True,maxDepth=3):
257 seen = set()
258 if not seeds:
259 seeds = list(fragments)
260 if scrambleReagents:
261 seeds = list(seeds)
262 random.shuffle(seeds)
263 if scrambleReagents:
264 tempReactions = list(reverseReactions)
265 random.shuffle(tempReactions)
266 else:
267 tempReactions=reverseReactions
268 for seed in seeds:
269 seedIsR1=False
270 seedIsR2=False
271 nextSteps=[]
272 for rxn in tempReactions:
273 if seed.HasSubstructMatch(rxn._matchers[0]):
274 seedIsR1=True
275 if seed.HasSubstructMatch(rxn._matchers[1]):
276 seedIsR2=True
277 for fragment in fragments:
278 ps = None
279 if fragment.HasSubstructMatch(rxn._matchers[0]):
280 if seedIsR2:
281 ps = rxn.RunReactants((fragment,seed))
282 if fragment.HasSubstructMatch(rxn._matchers[1]):
283 if seedIsR1:
284 ps = rxn.RunReactants((seed,fragment))
285 if ps:
286 for p in ps:
287 if uniquify:
288 pSmi =Chem.MolToSmiles(p[0],True)
289 if pSmi in seen:
290 continue
291 else:
292 seen.add(pSmi)
293 if p[0].HasSubstructMatch(dummyPattern):
294 nextSteps.append(p[0])
295 if not onlyCompleteMols:
296 yield p[0]
297 else:
298 yield p[0]
299 if nextSteps and maxDepth>1:
300 for p in BRICSBuild(fragments,onlyCompleteMols=onlyCompleteMols,
301 seeds=nextSteps,uniquify=uniquify,
302 maxDepth=maxDepth-1):
303 if uniquify:
304 pSmi =Chem.MolToSmiles(p,True)
305 if pSmi in seen:
306 continue
307 else:
308 seen.add(pSmi)
309 yield p
310
311
312
313
314
315 if __name__=='__main__':
316 import unittest
363
365
366 m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1')
367 res = BRICSDecompose(m)
368 self.failUnless(res)
369 self.failUnless(len(res)==7,res)
370
372 m = Chem.MolFromSmiles('FC(F)(F)C1=C(Cl)C=CC(NC(=O)NC2=CC=CC=C2)=C1')
373 res = BRICSDecompose(m)
374 self.failUnless(res)
375 self.failUnless(len(res)==3,res)
376
377
379 allNodes = set()
380 m = Chem.MolFromSmiles('c1ccccc1OCCC')
381 res = BRICSDecompose(m,allNodes=allNodes)
382 self.failUnless(res)
383 leaves=res
384 self.failUnless(len(allNodes)==5,allNodes)
385 res = BRICSDecompose(m,allNodes=allNodes)
386 self.failIf(res)
387 self.failUnless(len(allNodes)==5,allNodes)
388
389 m = Chem.MolFromSmiles('c1ccccc1OCCCC')
390 res = BRICSDecompose(m,allNodes=allNodes)
391 self.failUnless(res)
392 leaves.update(res)
393 self.failUnless(len(allNodes)==8,allNodes)
394 self.failUnless(len(leaves)==6,leaves)
395
396
397 m = Chem.MolFromSmiles('c1cc(C(=O)NCC)ccc1OCCC')
398 res = BRICSDecompose(m,allNodes=allNodes)
399 self.failUnless(res)
400 leaves.update(res)
401 self.failUnless(len(allNodes)==13,allNodes)
402 self.failUnless(len(leaves)==9,leaves)
403
405 allNodes = set()
406 frags = [
407 '[14*]c1ncncn1',
408 '[16*]c1ccccc1',
409 '[14*]c1ncccc1',
410 ]
411 frags = [Chem.MolFromSmiles(x) for x in frags]
412 res = BRICSBuild(frags)
413 self.failUnless(res)
414 res= list(res)
415 self.failUnless(len(res)==6)
416 smis = [Chem.MolToSmiles(x,True) for x in res]
417 self.failUnless('c1ccc(-c2ccccc2)cc1' in smis)
418 self.failUnless('c1ccc(-c2ncccc2)cc1' in smis)
419
421 allNodes = set()
422 frags = [
423 '[16*]c1ccccc1',
424 '[3*]OC',
425 '[9*]n1cccc1',
426 ]
427 frags = [Chem.MolFromSmiles(x) for x in frags]
428 res = BRICSBuild(frags)
429 self.failUnless(res)
430 res= list(res)
431 self.failUnless(len(res)==3)
432 smis = [Chem.MolToSmiles(x,True) for x in res]
433 self.failUnless('c1ccc(-c2ccccc2)cc1' in smis)
434 self.failUnless('COc1ccccc1' in smis)
435 self.failUnless('c1ccc(-n2cccc2)cc1' in smis)
436
438 allNodes = set()
439 frags = [
440 '[16*]c1ccccc1',
441 '[3*]OC',
442 '[3*]OCC(=O)[6*]',
443 ]
444 frags = [Chem.MolFromSmiles(x) for x in frags]
445 res = BRICSBuild(frags)
446 self.failUnless(res)
447 res= list(res)
448 smis = [Chem.MolToSmiles(x,True) for x in res]
449 self.failUnless(len(res)==3)
450 self.failUnless('c1ccc(-c2ccccc2)cc1' in smis)
451 self.failUnless('COc1ccccc1' in smis)
452 self.failUnless('O=C(COc1ccccc1)c1ccccc1' in smis)
453
462
463
464 unittest.main()
465