1
2
3
4
5
6
7 """ a set of functions for interacting with databases
8
9 When possible, it's probably preferable to use a _DbConnection.DbConnect_ object
10
11 """
12 import RDConfig
13 try:
14 from Excel import ExcelWrapper
15 except:
16 haveExcel = 0
17 else:
18 haveExcel = 1
19
20 from Dbase.DbResultSet import DbResultSet,RandomAccessDbResultSet
22 return map(lambda x,y=fromL:y[x],what)
23
24 from Dbase import DbModule
25 import sys,types,string
26 from Dbase import DbInfo
27
28 -def GetColumns(dBase,table,fieldString,user='sysdba',password='masterkey',
29 join=''):
30 """ gets a set of data from a table
31
32 **Arguments**
33
34 - dBase: database name
35
36 - table: table name
37
38 - fieldString: a string with the names of the fields to be extracted,
39 this should be a comma delimited list
40
41 - user and password:
42
43 - join: a join clause (omit the verb 'join')
44
45
46 **Returns**
47
48 - a list of the data
49
50 """
51 cn = DbModule.connect(dBase,user,password)
52 c = cn.cursor()
53 cmd = 'select %s from %s'%(fieldString,table)
54 if join:
55 if join.strip().find('join') != 0:
56 join = 'join %s'%(join)
57 cmd +=' ' + join
58 c.execute(cmd)
59 return c.fetchall()
60
61 -def GetData(dBase,table,fieldString='*',whereString='',user='sysdba',password='masterkey',
62 removeDups=-1,join='',forceList=0,transform=None,randomAccess=1,extras=None):
63 """ a more flexible method to get a set of data from a table
64
65 **Arguments**
66
67 - fields: a string with the names of the fields to be extracted,
68 this should be a comma delimited list
69
70 - where: the SQL where clause to be used with the DB query
71
72 - removeDups indicates the column which should be used to screen
73 out duplicates. Only the first appearance of a duplicate will
74 be left in the dataset.
75
76 **Returns**
77
78 - a list of the data
79
80
81 **Notes**
82
83 - EFF: this isn't particularly efficient
84
85 """
86 cn = DbModule.connect(dBase,user,password)
87 c = cn.cursor()
88 cmd = 'select %s from %s'%(fieldString,table)
89 if join:
90 if join.strip().find('join') != 0:
91 join = 'join %s'%(join)
92 cmd += ' ' + join
93 if whereString:
94 if whereString.strip().find('where')!=0:
95 whereString = 'where %s'%(whereString)
96 cmd += ' ' + whereString
97
98 if forceList:
99 try:
100 if not extras:
101 c.execute(cmd)
102 else:
103 c.execute(cmd,extras)
104 except:
105 sys.stderr.write('the command "%s" generated errors:\n'%(cmd))
106 import traceback
107 traceback.print_exc()
108 return None
109 if transform is not None:
110 raise ValueError,'forceList and transform arguments are not compatible'
111 if not randomAccess:
112 raise ValueError,'when forceList is set, randomAccess must also be used'
113 data = c.fetchall()
114 if removeDups>0:
115 seen = []
116 for entry in data[:]:
117 if entry[removeDups] in seen:
118 data.remove(entry)
119 else:
120 seen.append(entry[removeDups])
121 else:
122 if randomAccess:
123 klass = RandomAccessDbResultSet
124 else:
125 klass = DbResultSet
126
127 data = klass(c,cn,cmd,removeDups=removeDups,transform=transform,extras=extras)
128
129 return data
130
131
132 -def DatabaseToExcel(dBase,table,fields='*',join='',where='',wrapper=None,
133 user='sysdba',password='masterkey',lowMemory=False):
134 """ Pulls the contents of a database and puts them in an Excel worksheet
135
136 **Arguments**
137 - dBase: the name of the DB file to be used
138
139 - table: the name of the table to query
140
141 - fields: the fields to select with the SQL query
142
143 - join: the join clause of the SQL query
144 (e.g. 'join foo on foo.bar=base.bar')
145
146 - where: the where clause of the SQL query
147 (e.g. 'where foo = 2' or 'where bar > 17.6')
148
149 - wrapper: an _Excel.ExcelWrapper.ExcelWrapper_ to be used
150 in interacting with Excel
151
152 - user: the username for DB access
153
154 - password: the password to be used for DB access
155
156 """
157 if not haveExcel:
158 return
159 if wrapper is None:
160 wrapper = ExcelWrapper.ExcelWrapper()
161 wrapper.Visible = 1
162 if len(where) and where.strip().find('where') != 0:
163 where = 'where %s'%(where)
164 if len(join) and join.strip().find('join') != 0:
165 join = 'join %s'%(join)
166
167 sqlCommand = 'select %s from %s %s %s'%(fields,table,join,where)
168 cn = DbModule.connect(dBase,user,password)
169 c = cn.cursor()
170 try:
171 c.execute(sqlCommand)
172 except:
173 print 'problems executing SQL statement %s'%(repr(sqlCommand))
174 import sys,traceback
175 traceback.print_exc()
176 return
177 headers = []
178 colsToTake = []
179 strCols = []
180
181
182 for i in range(len(c.description)):
183 item = c.description[i]
184 if item[1] not in DbInfo.sqlBinTypes:
185 colsToTake.append(i)
186 headers.append(item[0])
187 if item[1] in DbInfo.sqlTextTypes:
188 strCols.append(len(colsToTake)-1)
189 wrapper.Workbooks.Add()
190
191 r = wrapper.GetRange(1,1,1,len(headers))
192
193 r.Value = headers
194
195 r.Font.Bold = 1
196
197
198 results = c.fetchall()
199 row = 2
200 fullData = []
201 for res in results:
202 vs = [res[x] for x in colsToTake]
203 for col in strCols:
204 vs[col] = "'%s"%vs[col]
205 if not lowMemory:
206 fullData.append(vs)
207 else:
208 wrapper[row,1:len(headers)]=vs
209 row+=1
210 if not lowMemory:
211 wrapper[row:row+len(fullData),1:len(headers)] = fullData
212
213
214
215 -def DatabaseToText(dBase,table,fields='*',join='',where='',wrapper=None,
216 user='sysdba',password='masterkey',delim=','):
217 """ Pulls the contents of a database and makes a deliminted text file from them
218
219 **Arguments**
220 - dBase: the name of the DB file to be used
221
222 - table: the name of the table to query
223
224 - fields: the fields to select with the SQL query
225
226 - join: the join clause of the SQL query
227 (e.g. 'join foo on foo.bar=base.bar')
228
229 - where: the where clause of the SQL query
230 (e.g. 'where foo = 2' or 'where bar > 17.6')
231
232 - wrapper: an _Excel.ExcelWrapper.ExcelWrapper_ to be used
233 in interacting with Excel
234
235 - user: the username for DB access
236
237 - password: the password to be used for DB access
238
239 **Returns**
240
241 - the CSV data (as text)
242
243 """
244 if len(where) and where.strip().find('where')==-1:
245 where = 'where %s'%(where)
246 if len(join) and join.strip().find('join') == -1:
247 join = 'join %s'%(join)
248 sqlCommand = 'select %s from %s %s %s'%(fields,table,join,where)
249 cn = DbModule.connect(dBase,user,password)
250 c = cn.cursor()
251 c.execute(sqlCommand)
252 headers = []
253 colsToTake = []
254
255
256 for i in range(len(c.description)):
257 item = c.description[i]
258 if item[1] not in DbInfo.sqlBinTypes:
259 colsToTake.append(i)
260 headers.append(item[0])
261
262 lines = []
263 lines.append(delim.join(headers))
264
265
266 results = c.fetchall()
267 for res in results:
268 d = _take(res,colsToTake)
269 lines.append(delim.join(map(str,d)))
270
271 return '\n'.join(lines)
272
273
275 """
276
277 finds the types of the columns in _data_
278
279 if nullMarker is not None, elements of the data table which are
280 equal to nullMarker will not count towards setting the type of
281 their columns.
282
283 """
284 priorities={types.FloatType:3,types.IntType:2,types.StringType:1,-1:-1}
285 res = [None]*nCols
286 for col in xrange(nCols):
287 typeHere = [-1,1]
288 for row in xrange(nRows):
289 d = data[row][col]
290 if d is not None:
291 locType = type(d)
292 if locType != types.FloatType and locType != types.IntType:
293 locType = types.StringType
294 try:
295 d = str(d)
296 except UnicodeError,msg:
297 print 'cannot convert text from row %d col %d to a string'%(row+2,col)
298 print '\t>%s'%(repr(d))
299 raise UnicodeError,msg
300 else:
301 typeHere[1] = max(typeHere[1],len(str(d)))
302 if locType == types.StringType:
303 if nullMarker is None or d != nullMarker:
304 l = max(len(d),typeHere[1])
305 typeHere = [types.StringType,l]
306 else:
307 try:
308 fD = float(int(d))
309 except OverflowError:
310 locType = types.FloatType
311 else:
312 if fD == d:
313 locType = types.IntType
314 if typeHere[0]!=types.StringType and \
315 priorities[locType] > priorities[typeHere[0]]:
316 typeHere[0] = locType
317 res[col] = typeHere
318 return res
319
321 """This is kind of crude hack to automagically determine the types
322 of columns in the active Excel sheet
323
324 **Arguments**
325
326 - wrapper: the _ExcelWrapper_ to be used in interacting with Excel
327
328 - nullMarker: (optional) if this is not None, elements of the
329 data table which are equal to nullMarker will not count towards
330 setting the type of their columns.
331
332 **Returns**
333
334 - a list of the types of each column
335
336 **Note**
337
338 - we make the assumption that there are only three possible types: int,
339 float and string.
340
341 """
342 nCols = wrapper.FindLastCol(1,1)
343 nRows = wrapper.FindLastRow(1,1)
344 dList = wrapper[2:nRows,1:nCols]
345 res = TypeFinder(dList,nRows-1,nCols,nullMarker=nullMarker)
346 return res
347
348
350 """ *For Internal Use*
351
352 removes illegal characters from column headings
353 and truncates those which are too long.
354
355 """
356 for i in xrange(len(colHeadings)):
357
358 colHeadings[i] = string.strip(colHeadings[i])
359 colHeadings[i] = string.replace(colHeadings[i],' ','_')
360 colHeadings[i] = string.replace(colHeadings[i],'-','_')
361
362 if len(colHeadings[i]) > maxColLabelLen:
363
364 newHead = string.replace(colHeadings[i],'_','')
365 newHead = newHead[:maxColLabelLen]
366 print '\tHeading %s too long, changed to %s'%(colHeadings[i],newHead)
367 colHeadings[i] = newHead
368 return colHeadings
369
371 """ returns a list of SQL type strings
372 """
373 typeStrs=[]
374 for i in xrange(len(colTypes)):
375 type = colTypes[i]
376 if type[0] == types.FloatType:
377 typeStrs.append('%s double precision'%colHeadings[i])
378 elif type[0] == types.IntType:
379 typeStrs.append('%s integer'%colHeadings[i])
380 else:
381 typeStrs.append('%s varchar(%d)'%(colHeadings[i],type[1]))
382 if colHeadings[i] == keyCol:
383 typeStrs[-1] = '%s not null primary key'%(typeStrs[-1])
384 return typeStrs
385
387 try:
388 conn.cursor().executemany(sqlStr,block)
389 except:
390 res = 0
391 conn.commit()
392 for row in block:
393 try:
394 conn.cursor().execute(sqlStr,tuple(row))
395 res += 1
396 except:
397 if not silent:
398 import traceback
399 traceback.print_exc()
400 print 'insert failed:',sqlStr
401 print '\t',repr(row)
402 else:
403 conn.commit()
404 else:
405 res = len(block)
406 return res
407
408 -def _AddDataToDb(dBase,table,user,password,colDefs,colTypes,data,
409 nullMarker=None,blockSize=100):
410 """ *For Internal Use*
411
412 (drops and) creates a table and then inserts the values
413
414 """
415 cn = DbModule.connect(dBase,user,password)
416 c = cn.cursor()
417 try:
418 c.execute('drop table %s'%(table))
419 except:
420 print 'cannot drop table %s'%(table)
421 try:
422 sqlStr = 'create table %s (%s)'%(table,colDefs)
423 c.execute(sqlStr)
424 except:
425 print 'create table failed: ', sqlStr
426 print 'here is the exception:'
427 import traceback
428 traceback.print_exc()
429 return
430 cn.commit()
431 c = None
432
433 block = []
434 entryTxt = [DbModule.placeHolder]*len(data[0])
435 dStr = ','.join(entryTxt)
436 sqlStr = 'insert into %s values (%s)'%(table,dStr)
437 nDone = 0
438 for row in data:
439 entries = [None]*len(row)
440 for col in xrange(len(row)):
441 if row[col] is not None and \
442 (nullMarker is None or row[col] != nullMarker):
443 if colTypes[col][0] == types.FloatType:
444 entries[col] = float(row[col])
445 elif colTypes[col][0] == types.IntType:
446 entries[col] = int(row[col])
447 else:
448 entries[col] = str(row[col])
449 else:
450 entries[col] = None
451 block.append(tuple(entries))
452 if len(block)>=blockSize:
453 nDone += _insertBlock(cn,sqlStr,block)
454 if not hasattr(cn,'autocommit') or not cn.autocommit:
455 cn.commit()
456 block = []
457 if len(block):
458 nDone += _insertBlock(cn,sqlStr,block)
459 if not hasattr(cn,'autocommit') or not cn.autocommit:
460 cn.commit()
461
462
463
464 -def ExcelToDatabase(dBase,table,wrapper=None,user='sysdba',password='masterkey',
465 maxColLabelLen=31,keyCol=None,nullMarker=None,force=0):
466 """convert the active excel worksheet into a database.
467
468 this isn't as smooth or slick as the conversion the other way... sad.
469
470 **Arguments**
471
472 - dBase: the name of the DB to use
473
474 - table: the name of the table to create/overwrite
475
476 - wrapper: the _ExcelWrapper_ to use
477
478 - user: the user name to use in connecting to the DB
479
480 - password: the password to use in connecting to the DB
481
482 - maxColLabelLen: the maximum length a column label should be
483 allowed to have (truncation otherwise)
484
485 - keyCol: the column to be used as an index for the db
486
487 **Notes**
488
489 - if _table_ already exists, it is destroyed before we write
490 the new data
491
492 """
493 if not haveExcel:
494 return
495 table.replace('-','_')
496 table.replace(' ','_')
497 if not force:
498 tblNames = [x.strip() for x in DbInfo.GetTableNames(dBase)]
499 tmp = table.upper()
500 if tmp in tblNames:
501 resp = raw_input('Table %s already exists, overwrite it? '%(table))
502 if not resp or resp[0] not in ['Y','y']:
503 print 'cancelled'
504 return
505
506 if wrapper is None:
507 wrapper = ExcelWrapper.ExcelWrapper()
508 colHeadings = wrapper.GetHeadings()
509 _AdjustColHeadings(colHeadings,maxColLabelLen)
510 nCols = len(colHeadings)
511 nRows = wrapper.FindLastRow(1,1)
512 data = wrapper[2:nRows,1:nCols]
513
514 colTypes = TypeFinder(data,nRows-1,nCols,nullMarker=nullMarker)
515 typeStrs = GetTypeStrings(colHeadings,colTypes,keyCol=keyCol)
516 colDefs=','.join(typeStrs)
517
518 _AddDataToDb(dBase,table,user,password,colDefs,colTypes,data,nullMarker=nullMarker)
519
520 -def TextFileToDatabase(dBase,table,inF,delim=',',
521 user='sysdba',password='masterkey',
522 maxColLabelLen=31,keyCol=None,nullMarker=None):
523 """loads the contents of the text file into a database.
524
525 **Arguments**
526
527 - dBase: the name of the DB to use
528
529 - table: the name of the table to create/overwrite
530
531 - inF: the file like object from which the data should
532 be pulled (must support readline())
533
534 - delim: the delimiter used to separate fields
535
536 - user: the user name to use in connecting to the DB
537
538 - password: the password to use in connecting to the DB
539
540 - maxColLabelLen: the maximum length a column label should be
541 allowed to have (truncation otherwise)
542
543 - keyCol: the column to be used as an index for the db
544
545 **Notes**
546
547 - if _table_ already exists, it is destroyed before we write
548 the new data
549
550 - we assume that the first row of the file contains the column names
551
552 """
553 table.replace('-','_')
554 table.replace(' ','_')
555
556 colHeadings = inF.readline().split(delim)
557 _AdjustColHeadings(colHeadings,maxColLabelLen)
558 nCols = len(colHeadings)
559 data = []
560 inL = inF.readline()
561 while inL:
562 inL = inL.replace('\r','')
563 inL = inL.replace('\n','')
564 splitL = inL.split(delim)
565 if len(splitL)!=nCols:
566 print '>>>',repr(inL)
567 assert len(splitL)==nCols,'unequal length'
568 tmpVect = []
569 for entry in splitL:
570 try:
571 val = int(entry)
572 except:
573 try:
574 val = float(entry)
575 except:
576 val = entry
577 tmpVect.append(val)
578 data.append(tmpVect)
579 inL = inF.readline()
580 nRows = len(data)
581
582
583 colTypes = TypeFinder(data,nRows,nCols,nullMarker=nullMarker)
584 typeStrs = GetTypeStrings(colHeadings,colTypes,keyCol=keyCol)
585 colDefs=','.join(typeStrs)
586
587 _AddDataToDb(dBase,table,user,password,colDefs,colTypes,data,
588 nullMarker=nullMarker)
589
590
591 -def DatabaseToDatabase(fromDb,fromTbl,toDb,toTbl,
592 fields='*',join='',where='',
593 user='sysdba',password='masterkey',keyCol=None,nullMarker='None'):
594 """
595
596 FIX: at the moment this is a hack
597
598 """
599 import cStringIO
600 io = cStringIO.StringIO()
601 io.write(DatabaseToText(fromDb,fromTbl,fields=fields,join=join,where=where,
602 user=user,password=password))
603 io.seek(-1)
604 TextFileToDatabase(toDb,toTbl,io,user=user,password=password,keyCol=keyCol,
605 nullMarker=nullMarker)
606
607
608 if __name__=='__main__':
609 import cStringIO
610
611 io = cStringIO.StringIO()
612 io.write('foo,bar,baz\n')
613 io.write('1,2,3\n')
614 io.write('1.1,4,5\n')
615 io.write('4,foo,6\n')
616 io.seek(0)
617 import RDConfig,os
618 dirLoc = os.path.join(RDConfig.RDCodeDir,'Dbase','TEST.GDB')
619
620 TextFileToDatabase(dirLoc,'fromtext',io)
621