1
2
3
4
5
6
7 from rdkit import RDConfig
8 from rdkit import Chem
9 import sys,csv
10
11 -def Convert(suppl,outFile,keyCol='',stopAfter=-1,includeChirality=0,smilesFrom=''):
12 w = csv.writer(outFile)
13 mol = suppl[0]
14 propNames = list(mol.GetPropNames())
15 if keyCol and keyCol in propNames:
16 propNames.remove(keyCol)
17
18 outL = []
19 if keyCol:
20 outL.append(keyCol)
21 outL.append('SMILES')
22 outL.extend(propNames)
23 w.writerow(outL)
24 nDone = 0
25 for mol in suppl:
26 if not mol:
27 continue
28 if not smilesFrom or not mol.HasProp(smilesFrom):
29 smi = Chem.MolToSmiles(mol,includeChirality)
30 else:
31 smi = mol.GetProp(smilesFrom)
32 tMol = Chem.MolFromSmiles(smi)
33 smi = Chem.MolToSmiles(tMol,includeChirality)
34 outL = []
35 if keyCol:
36 outL.append(str(mol.GetProp(keyCol)))
37 outL.append(smi)
38 for prop in propNames:
39 if mol.HasProp(prop):
40 outL.append(str(mol.GetProp(prop)))
41 else:
42 outL.append('')
43 w.writerow(outL)
44 nDone += 1
45 if nDone == stopAfter:
46 break
47 return
48
49
50
51
52 import unittest
59 import os
60 from cStringIO import StringIO
61 fName = os.path.join(RDConfig.RDDataDir,'NCI','first_200.props.sdf')
62 suppl = Chem.SDMolSupplier(fName)
63 io = StringIO()
64 try:
65 Convert(suppl,io)
66 except:
67 import traceback
68 traceback.print_exc()
69 self.fail('conversion failed')
70 txt = io.getvalue()
71 lines = txt.split('\n')
72 if not lines[-1]:
73 del lines[-1]
74 self.failUnless(len(lines)==201,'bad num lines: %d'%len(lines))
75 line0 = lines[0].split(',')
76 self.failUnless(len(line0)==16,'%d'%len(line0))
77 self.failUnless(line0[0]=='SMILES')
79 import os
80 from cStringIO import StringIO
81 fName = os.path.join(RDConfig.RDDataDir,'NCI','first_200.props.sdf')
82 suppl = Chem.SDMolSupplier(fName)
83 io = StringIO()
84 try:
85 Convert(suppl,io,keyCol='AMW',stopAfter=5)
86 except:
87 import traceback
88 traceback.print_exc()
89 self.fail('conversion failed')
90 txt = io.getvalue()
91 lines = txt.split('\n')
92 if not lines[-1]:
93 del lines[-1]
94 self.failUnless(len(lines)==6,'bad num lines: %d'%len(lines))
95 line0 = lines[0].split(',')
96 self.failUnless(len(line0)==16,'%d'%len(line0))
97 self.failUnless(line0[0]=='AMW')
98 self.failUnless(line0[1]=='SMILES')
99
100
101
102
103
104
105
106
107
109 message = """
110 Usage: SDFToCSV [-k keyCol] inFile.sdf [outFile.csv]
111
112 """
113 sys.stderr.write(message)
114 sys.exit(-1)
115
116
117
118 if __name__=='__main__':
119 import getopt
120
121 try:
122 args,extras = getopt.getopt(sys.argv[1:],'hk:',
123 ['test',
124 'chiral',
125 'smilesCol=',
126 ])
127 except:
128 import traceback
129 traceback.print_exc()
130 Usage()
131
132 keyCol = ''
133 testIt = 0
134 useChirality=0
135 smilesCol=''
136 for arg,val in args:
137 if arg=='-k':
138 keyCol = val
139 elif arg=='--chiral':
140 useChirality=1
141 elif arg=='--smilesCol':
142 smilesCol=val
143 elif arg=='--test':
144 testIt=1
145 elif arg=='-h':
146 Usage()
147
148 if not testIt and len(extras)<1:
149 Usage()
150
151
152 if not testIt:
153 inFilename = extras[0]
154 if len(extras)>1:
155 outFilename = extras[1]
156 outF = open(outFilename,'w+')
157 else:
158 outF = sys.stdout
159
160 suppl = Chem.SDMolSupplier(inFilename)
161 Convert(suppl,outF,keyCol=keyCol,includeChirality=useChirality,smilesFrom=smilesCol)
162 else:
163 sys.argv = [sys.argv[0]]
164 unittest.main()
165