1
2
3
4
5
6 import RDLogger as logging
7 logger = logging.logger()
8 logger.setLevel(logging.INFO)
9 import Chem
10 from Chem import Crippen
11 from Chem import AllChem
12 from Chem.ChemUtils.AlignDepict import AlignDepict
13 import sys
14 _version="0.7.1"
15 _greet="This is TemplateExpand version %s"%_version
16
17 _usage="""
18 Usage: TemplateExpand [options] template <sidechains>
19
20 Unless otherwise indicated, the template and sidechains are assumed to be
21 Smiles
22
23 Each sidechain entry should be:
24 [-r] SMARTS filename
25 The SMARTS pattern is used to recognize the attachment point,
26 if the -r argument is not provided, then atoms matching the pattern
27 will be removed from the sidechains.
28 or
29 -n filename
30 where the attachment atom is the first atom in each molecule
31 The filename provides the list of potential sidechains.
32
33 options:
34 -o filename.sdf: provides the name of the output file, otherwise
35 stdout is used
36
37 --sdf : expect the sidechains to be in SD files
38
39 --moltemplate: the template(s) are in a mol/SD file, new depiction(s)
40 will not be generated unless the --redraw argument is also
41 provided
42
43 --smilesFileTemplate: extract the template(s) from a SMILES file instead of
44 expecting SMILES on the command line.
45
46 --redraw: generate a new depiction for the molecular template(s)
47
48 --useall:
49 or
50 --useallmatches: generate a product for each possible match of the attachment
51 pattern to each sidechain. If this is not provided, the first
52 match (not canonically defined) will be used.
53
54 --force: by default, the program prompts the user if the library is
55 going to contain more than 1000 compounds. This argument
56 disables the prompt.
57
58 --templateSmarts="smarts": provides a space-delimited list containing the SMARTS
59 patterns to be used to recognize attachment points in
60 the template
61
62 --autoNames: when set this toggle causes the resulting compounds to be named
63 based on there sequence id in the file, e.g.
64 "TemplateEnum: Mol_1", "TemplateEnum: Mol_2", etc.
65 otherwise the names of the template and building blocks (from
66 the input files) will be combined to form a name for each
67 product molecule.
68
69 """
71 import sys
72 print >>sys.stderr,_usage
73 sys.exit(-1)
74
75 nDumped=0
76 -def _exploder(mol,depth,sidechains,core,chainIndices,autoNames=True,templateName='',
77 resetCounter=True):
78 global nDumped
79 if resetCounter:
80 nDumped=0
81 ourChains = sidechains[depth]
82 patt = '[%d*]'%(depth+1)
83 patt = Chem.MolFromSmiles(patt)
84 for i,(chainIdx,chain) in enumerate(ourChains):
85 tchain = chainIndices[:]
86 tchain.append((i,chainIdx))
87 rs = Chem.ReplaceSubstructs(mol,patt,chain,replaceAll=True)
88 if rs:
89 r = rs[0]
90 if depth<len(sidechains)-1:
91 for entry in _exploder(r,depth+1,sidechains,core,tchain,
92 autoNames=autoNames,templateName=templateName,
93 resetCounter=0):
94 yield entry
95 else:
96 try:
97 Chem.SanitizeMol(r)
98 except ValueError:
99 import traceback
100 traceback.print_exc()
101 continue
102 if r.HasSubstructMatch(core):
103 try:
104 AlignDepict(r,core)
105 except:
106 import traceback
107 traceback.print_exc()
108 print >>sys.stderr,Chem.MolToSmiles(r)
109 else:
110 print >>sys.stderr,'>>> no match'
111 AllChem.Compute2DCoords(r)
112 Chem.Kekulize(r)
113 if autoNames:
114 tName = "TemplateEnum: Mol_%d"%(nDumped+1)
115 else:
116 tName = templateName
117 for bbI,bb in enumerate(tchain):
118 bbMol = sidechains[bbI][bb[0]][1]
119 if bbMol.HasProp('_Name'):
120 bbNm = bbMol.GetProp('_Name')
121 else:
122 bbNm = str(bb[1])
123 tName += '_' + bbNm
124
125 r.SetProp("_Name",tName)
126 r.SetProp('seq_num',str(nDumped+1))
127 r.SetProp('reagent_indices','_'.join([str(x[1]) for x in tchain]))
128 for bbI,bb in enumerate(tchain):
129 bbMol = sidechains[bbI][bb[0]][1]
130 if bbMol.HasProp('_Name'):
131 bbNm = bbMol.GetProp('_Name')
132 else:
133 bbNm = str(bb[1])
134 r.SetProp('building_block_%d'%(bbI+1),bbNm)
135 for propN in bbMol.GetPropNames():
136 r.SetProp('building_block_%d_%s'%(bbI+1,propN),bbMol.GetProp(propN))
137 nDumped += 1
138 if not nDumped%100:
139 logger.info('Done %d molecules'%nDumped)
140 yield r
141
142 -def Explode(template,sidechains,outF,autoNames=True):
143 chainIndices=[]
144 core = Chem.DeleteSubstructs(template,Chem.MolFromSmiles('[*]'))
145 try:
146 templateName = template.GetProp('_Name')
147 except KeyError:
148 templateName="template"
149 for mol in _exploder(template,0,sidechains,core,chainIndices,autoNames=autoNames,templateName=templateName):
150 outF.write(Chem.MolToMolBlock(mol))
151 for pN in mol.GetPropNames():
152 print >>outF,'> <%s>\n%s\n'%(pN,mol.GetProp(pN))
153 print >>outF,'$$$$'
154
175
177 if sma:
178 try:
179 patt = Chem.MolFromSmarts(sma)
180 except:
181 logger.error('could not construct pattern from smarts: %s'%sma,
182 exc_info=True)
183 return None
184 else:
185 patt = None
186
187 if replace:
188 replacement = Chem.MolFromSmiles('[*]')
189
190 res = []
191 for idx,mol in enumerate(suppl):
192 if not mol:
193 continue
194 if patt:
195 if not mol.HasSubstructMatch(patt):
196 logger.warning('The substructure pattern did not match sidechain %d. This may result in errors.'%(idx+1))
197 if replace:
198 tmp = list(Chem.ReplaceSubstructs(mol,patt,replacement))
199 if not useAll: tmp = [tmp[0]]
200 for i,entry in enumerate(tmp):
201 entry = MoveDummyNeighborsToBeginning(entry)
202 if not entry:
203 continue
204 entry = entry[0]
205
206 for propN in mol.GetPropNames():
207 entry.SetProp(propN,mol.GetProp(propN));
208
209
210 tmp[i] = (idx+1,entry)
211 else:
212
213
214 matches = mol.GetSubstructMatches(patt)
215 if matches:
216 tmp = []
217 for match in matches:
218 smi = Chem.MolToSmiles(mol,True,rootedAtAtom=match[0])
219 entry = Chem.MolFromSmiles(smi)
220 for propN in mol.GetPropNames():
221 entry.SetProp(propN,mol.GetProp(propN));
222
223
224
225 tmp.append((idx+1,entry))
226 else:
227 tmp = None
228 else:
229 tmp = [(idx+1,mol)]
230 if tmp:
231 res.extend(tmp)
232 return res
233
234 if __name__=='__main__':
235 import getopt,sys
236 print >>sys.stderr,_greet
237
238 try:
239 args,extras = getopt.getopt(sys.argv[1:],'o:h',[
240 'sdf',
241 'moltemplate',
242 'smilesFileTemplate',
243 'templateSmarts=',
244 'redraw',
245 'force',
246 'useall',
247 'useallmatches',
248 'autoNames',
249 ])
250 except:
251 import traceback
252 traceback.print_exc()
253 Usage()
254
255 if len(extras)<3:
256 Usage()
257
258 tooLong=1000
259 sdLigands=False
260 molTemplate=False
261 redrawTemplate=False
262 outF=None
263 forceIt=False
264 useAll=False
265 templateSmarts=[]
266 smilesFileTemplate=False
267 autoNames=False
268 for arg,val in args:
269 if arg=='-o':
270 outF=val
271 elif arg=='--sdf':
272 sdLigands=True
273 elif arg=='--moltemplate':
274 molTemplate=True
275 elif arg=='--smilesFileTemplate':
276 smilesFileTemplate=True
277 elif arg=='--templateSmarts':
278 templateSmarts = val
279 elif arg=='--redraw':
280 redrawTemplate=True
281 elif arg=='--force':
282 forceIt=True
283 elif arg=='--autoNames':
284 autoNames=True
285 elif arg in ('--useall','--useallmatches'):
286 useAll=True
287 elif arg=='-h':
288 Usage()
289 sys.exit(0)
290
291
292 if templateSmarts:
293 splitL = templateSmarts.split(' ')
294 templateSmarts = []
295 for i,sma in enumerate(splitL):
296 patt = Chem.MolFromSmarts(sma)
297 if not patt:
298 raise ValueError,'could not convert smarts "%s" to a query'%sma
299 if i>=4:
300 i+=1
301 replace = Chem.MolFromSmiles('[%d*]'%(i+1))
302 templateSmarts.append((patt,replace))
303
304 if molTemplate:
305 try:
306 s = Chem.SDMolSupplier(extras[0])
307 templates = [x for x in s]
308 except:
309 logger.error('Could not construct templates from input file: %s'%extras[0],
310 exc_info=True)
311 sys.exit(1)
312 if redrawTemplate:
313 for template in templates:
314 AllChem.Compute2DCoords(template)
315 else:
316 if not smilesFileTemplate:
317 try:
318 templates = [Chem.MolFromSmiles(extras[0])]
319 except:
320 logger.error('Could not construct template from smiles: %s'%extras[0],
321 exc_info=True)
322 sys.exit(1)
323 else:
324 try:
325 s = Chem.SmilesMolSupplier(extras[0],titleLine=False)
326 templates = [x for x in s]
327 except:
328 logger.error('Could not construct templates from input file: %s'%extras[0],
329 exc_info=True)
330 sys.exit(1)
331 for template in templates:
332 AllChem.Compute2DCoords(template)
333 if templateSmarts:
334 finalTs = []
335 for i,template in enumerate(templates):
336 for j,(patt,replace) in enumerate(templateSmarts):
337 if not template.HasSubstructMatch(patt):
338 logger.error('template %d did not match sidechain pattern %d, skipping it'%(i+1,j+1))
339 template =None
340 break
341 template = Chem.ReplaceSubstructs(template,patt,replace)[0]
342 if template:
343 Chem.SanitizeMol(template)
344 finalTs.append(template)
345 templates = finalTs
346
347 sidechains = []
348 pos = 1
349 while pos<len(extras):
350 if extras[pos]=='-r':
351 replaceIt=False
352 pos += 1
353 else:
354 replaceIt=True
355 if extras[pos]=='-n':
356 sma = None
357 else:
358 sma = extras[pos]
359 pos += 1
360 try:
361 dat = extras[pos]
362 except IndexError:
363 logger.error('missing a sidechain filename')
364 sys.exit(-1)
365 pos += 1
366 if sdLigands:
367 try:
368 suppl = Chem.SDMolSupplier(dat)
369 except:
370 logger.error('could not construct supplier from SD file: %s'%dat,
371 exc_info=True)
372 suppl = []
373 else:
374 tmpF = file(dat,'r')
375 inL = tmpF.readline()
376 if len(inL.split(' '))<2:
377 nmCol=-1
378 else:
379 nmCol=1
380 try:
381 suppl = Chem.SmilesMolSupplier(dat,nameColumn=nmCol)
382 except:
383 logger.error('could not construct supplier from smiles file: %s'%dat,
384 exc_info=True)
385 suppl = []
386 suppl = [x for x in suppl]
387 chains = ConstructSidechains(suppl,sma=sma,replace=replaceIt,useAll=useAll)
388 if chains:
389 sidechains.append(chains)
390 count = 1
391 for chain in sidechains:
392 count *= len(chain)
393 count *= len(templates)
394 if not sidechains or not count:
395 print >>sys.stderr,"No molecules to be generated."
396 sys.exit(0)
397
398 if not forceIt and count>tooLong:
399 print >>sys.stderr,"This will generate %d molecules."%count
400 print >>sys.stderr,"Continue anyway? [no] ",
401 sys.stderr.flush()
402 ans = sys.stdin.readline()
403 if ans not in ('y','yes','Y','YES'):
404 sys.exit(0)
405
406 if outF and outF!="-":
407 try:
408 outF = file(outF,'w+')
409 except IOError:
410 logger.error('could not open file %s for writing'%(outF),
411 exc_info=True)
412 else:
413 outF = sys.stdout
414
415 for template in templates:
416 Explode(template,sidechains,outF,autoNames=autoNames)
417