Package rdkit :: Package Chem :: Package ChemUtils :: Module SDFToCSV
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.ChemUtils.SDFToCSV

  1  # $Id: SDFToCSV.py 997 2009-02-25 06:12:43Z glandrum $ 
  2  # 
  3  # Copyright (C) 2004-2006 Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  from rdkit import RDConfig 
  8  from rdkit import Chem 
  9  import sys,csv 
 10   
11 -def Convert(suppl,outFile,keyCol='',stopAfter=-1,includeChirality=0,smilesFrom=''):
12 w = csv.writer(outFile) 13 mol = suppl[0] 14 propNames = list(mol.GetPropNames()) 15 if keyCol and keyCol in propNames: 16 propNames.remove(keyCol) 17 18 outL = [] 19 if keyCol: 20 outL.append(keyCol) 21 outL.append('SMILES') 22 outL.extend(propNames) 23 w.writerow(outL) 24 nDone = 0 25 for mol in suppl: 26 if not mol: 27 continue 28 if not smilesFrom or not mol.HasProp(smilesFrom): 29 smi = Chem.MolToSmiles(mol,includeChirality) 30 else: 31 smi = mol.GetProp(smilesFrom) 32 tMol = Chem.MolFromSmiles(smi) 33 smi = Chem.MolToSmiles(tMol,includeChirality) 34 outL = [] 35 if keyCol: 36 outL.append(str(mol.GetProp(keyCol))) 37 outL.append(smi) 38 for prop in propNames: 39 if mol.HasProp(prop): 40 outL.append(str(mol.GetProp(prop))) 41 else: 42 outL.append('') 43 w.writerow(outL) 44 nDone += 1 45 if nDone == stopAfter: 46 break 47 return
48 49 50 #------------------- 51 # Testing: 52 import unittest
53 -class TestCase(unittest.TestCase):
54 - def setUp(self):
55 pass
56 - def tearDown(self):
57 pass
58 - def test1(self):
59 import os 60 from cStringIO import StringIO 61 fName = os.path.join(RDConfig.RDDataDir,'NCI','first_200.props.sdf') 62 suppl = Chem.SDMolSupplier(fName) 63 io = StringIO() 64 try: 65 Convert(suppl,io) 66 except: 67 import traceback 68 traceback.print_exc() 69 self.fail('conversion failed') 70 txt = io.getvalue() 71 lines = txt.split('\n') 72 if not lines[-1]: 73 del lines[-1] 74 self.failUnless(len(lines)==201,'bad num lines: %d'%len(lines)) 75 line0 = lines[0].split(',') 76 self.failUnless(len(line0)==16,'%d'%len(line0)) 77 self.failUnless(line0[0]=='SMILES')
78 - def test2(self):
79 import os 80 from cStringIO import StringIO 81 fName = os.path.join(RDConfig.RDDataDir,'NCI','first_200.props.sdf') 82 suppl = Chem.SDMolSupplier(fName) 83 io = StringIO() 84 try: 85 Convert(suppl,io,keyCol='AMW',stopAfter=5) 86 except: 87 import traceback 88 traceback.print_exc() 89 self.fail('conversion failed') 90 txt = io.getvalue() 91 lines = txt.split('\n') 92 if not lines[-1]: 93 del lines[-1] 94 self.failUnless(len(lines)==6,'bad num lines: %d'%len(lines)) 95 line0 = lines[0].split(',') 96 self.failUnless(len(line0)==16,'%d'%len(line0)) 97 self.failUnless(line0[0]=='AMW') 98 self.failUnless(line0[1]=='SMILES')
99 100 101 102 103 104 105 106 #------------------- 107 # CLI STuff:
108 -def Usage():
109 message = """ 110 Usage: SDFToCSV [-k keyCol] inFile.sdf [outFile.csv] 111 112 """ 113 sys.stderr.write(message) 114 sys.exit(-1)
115 116 117 118 if __name__=='__main__': 119 import getopt 120 121 try: 122 args,extras = getopt.getopt(sys.argv[1:],'hk:', 123 ['test', 124 'chiral', 125 'smilesCol=', 126 ]) 127 except: 128 import traceback 129 traceback.print_exc() 130 Usage() 131 132 keyCol = '' 133 testIt = 0 134 useChirality=0 135 smilesCol='' 136 for arg,val in args: 137 if arg=='-k': 138 keyCol = val 139 elif arg=='--chiral': 140 useChirality=1 141 elif arg=='--smilesCol': 142 smilesCol=val 143 elif arg=='--test': 144 testIt=1 145 elif arg=='-h': 146 Usage() 147 148 if not testIt and len(extras)<1: 149 Usage() 150 151 152 if not testIt: 153 inFilename = extras[0] 154 if len(extras)>1: 155 outFilename = extras[1] 156 outF = open(outFilename,'w+') 157 else: 158 outF = sys.stdout 159 160 suppl = Chem.SDMolSupplier(inFilename) 161 Convert(suppl,outF,keyCol=keyCol,includeChirality=useChirality,smilesFrom=smilesCol) 162 else: 163 sys.argv = [sys.argv[0]] 164 unittest.main() 165