Advertisement
ChrisProsser

excel_to_csv

Sep 11th, 2013
171
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.20 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. import os, csv, sys, Tkinter, tkFileDialog as fd, xlrd
  4.  
  5. # stop tinker shell from opening as only needed for file dialog
  6. root = Tkinter.Tk()
  7. root.withdraw()
  8.  
  9. def format_date(dt):
  10.     yyyy, mm, dd = str(dt[0]), str(dt[1]), str(dt[2])
  11.     hh, mi, ss = str(dt[3]), str(dt[4]), str(dt[5])
  12.  
  13.     if len(mm) == 1:
  14.         mm = '0'+mm
  15.     if len(dd) == 1:
  16.         dd = '0'+dd
  17.  
  18.     if hh == '0' and mi == '0' and ss == '0':
  19.         datetime_str = dd+'/'+mm+'/'+yyyy
  20.     else:
  21.         if len(hh) == 1:
  22.             hh = '0'+hh
  23.         if len(mi) == 1:
  24.             mi = '0'+mi
  25.         if len(ss) == 1:
  26.             ss = '0'+ss
  27.         datetime_str = dd+'/'+mm+'/'+yyyy+' '+hh+':'+mi+':'+ss
  28.        
  29.     return datetime_str
  30.  
  31. def xl_to_csv(in_path, out_files):
  32.     # set up vars to read file
  33.     wb = xlrd.open_workbook(in_path)
  34.     base_name = out_files[0]
  35.     sheet_no = 0
  36.  
  37.     for sheet in wb.sheets():
  38.  
  39.         print '\nGetting data from sheet:', sheet.name, '...'
  40.  
  41.         # set up vars to write file
  42.         out_path = base_name + '_' + sheet.name + '.csv'
  43.         if len(out_files) <= sheet_no:
  44.             out_files.append(out_path)
  45.         else:
  46.             out_files[sheet_no] = out_path
  47.        
  48.         fileout = open(out_path, 'wb')
  49.         writer = csv.writer(fileout, dialect='excel')
  50.  
  51.         # iterate through rows and cols
  52.         row_cnt, col_cnt = sheet.nrows, sheet.ncols
  53.         for r in range(row_cnt):
  54.  
  55.             # make list from row data
  56.             row = []
  57.             for c in range(col_cnt):
  58.                
  59.                 # check data type and make conversions
  60.                 val = sheet.cell(r,c).value
  61.                 if sheet.cell(r,c).ctype == 2: # number data type
  62.                     if val == int(val):
  63.                         val = int(val) # convert to int if no decimal remainder
  64.                 elif sheet.cell(r,c).ctype == 3: # date fields
  65.                     dt = xlrd.xldate_as_tuple(val, 0) # convert excel date to obj
  66.                     val = format_date(dt)
  67.                 elif sheet.cell(r,c).ctype == 4: # boolean data types
  68.                     val = str(bool(val)) # convert 1 or 0 to bool then string
  69.                 else:
  70.                     try:
  71.                         val = str(val)
  72.                     except UnicodeEncodeError:
  73.                         print '....encounted UnicodeEncodeError on row:', \
  74.                               r+1, 'column:', c+1
  75.                         print '....replacing unknown charachter with ?'
  76.                         val = str(val.encode('ascii', 'replace'))
  77.  
  78.                 row.append(val)
  79.  
  80.             # write row to csv file
  81.             try:
  82.                 writer.writerow(row)
  83.             except:
  84.                 print '...row failed in write to file:', row
  85.                 exc_type, exc_value, exc_traceback = sys.exc_info()
  86.                 lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
  87.                 for line in lines:
  88.                     print '!!', line
  89.  
  90.         print '...data written to:', out_path
  91.         sheet_no += 1
  92.        
  93.     return out_files
  94.  
  95. def main():
  96.     in_path, out_path = None, None
  97.     out_files = []
  98.     prog_name = sys.argv[0]
  99.    
  100.     # check if in_path and out_path are inlcuded as cmd line args...
  101.     if len(sys.argv) > 1:
  102.         in_path = sys.argv[1]
  103.         if not os.path.exists(in_path):
  104.             print 'Usage:', prog_name, '[<excel_file_path>] [<output_file_path>]'
  105.             print 'cannot find the file provided for excel_file_path:\n', in_path
  106.             sys.exit("Error - invalid excel_file_path arg")
  107.         if len(sys.argv) > 2:
  108.             out_path = sys.argv[2]
  109.             if not os.path.exists(os.path.dirname(out_path)):
  110.                 print 'Usage:', prog_name, '[<excel_file_path>] [<output_file_path>]'
  111.                 print 'cannot find the directory provided in output_file_path:\n', out_path
  112.                 sys.exit("Error - invalid output_file_path arg")
  113.     else:
  114.         # set current working directory to user's my documents folder
  115.         try:
  116.             os.chdir(os.path.join(os.getenv('userprofile'),'documents'))
  117.         except:
  118.             pass
  119.  
  120.     # ask user for path to Excel file...
  121.     while not in_path:
  122.         print "Please select the excel file to read data from ..."
  123.         try:
  124.             in_path = fd.askopenfilename()
  125.         except:
  126.             print 'Error selecting file.'
  127.         if not in_path:
  128.             cont = raw_input('Do you want to continue? (Y|N): ').upper()[0]
  129.             if cont == 'N':
  130.                 sys.exit("Error - unable to select input file")
  131.  
  132.     # get dir for output...
  133.     if not out_path:
  134.         out_path = os.path.dirname(in_path)
  135.    
  136.     # combine outpath directory with in_path filenmame (excl ext)...
  137.     f_name = os.path.splitext(os.path.basename(in_path))[0]
  138.     out_files.append(os.path.join(out_path,f_name))
  139.  
  140.     # call function to convert sheets to csv...
  141.     out_files = xl_to_csv(in_path, out_files)
  142.  
  143.     # open new files...
  144.     v_open = raw_input("\nOpen file(s) (Y/N): ").upper()
  145.     if v_open == 'Y':
  146.         for f in out_files:
  147.             os.startfile(f)
  148.     sys.exit()
  149.  
  150. if __name__ == '__main__':
  151.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement