Advertisement
Guest User

Untitled

a guest
Apr 19th, 2015
188
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.10 KB | None | 0 0
  1. import csv
  2. from formic import FileSet
  3. from openpyxl import load_workbook
  4. import re
  5. from os.path import basename
  6. import os
  7. import string
  8.  
  9.  
  10. def uclean(s): # Clean out non-unicode chars for csv.writer - SLOW
  11. try:
  12. return ''.join(char for char in s if char in string.printable).strip()
  13. except:
  14. return ''
  15.  
  16. def fclean(s): # Clean out non-filename-safe chars
  17. return ''.join([c for c in s if re.match(r'w', c)])
  18.  
  19. xlsx_files = FileSet(directory='C:\', include='**\*.xlsx') # the whole computer's excel files
  20. for filename in xlsx_files:
  21. wb = load_workbook(filename, use_iterators=True, read_only=True) # This is still using > 600 MBs
  22. for sheet in wb.worksheets:
  23. i = wb.worksheets.index(sheet)
  24. bf = os.path.splitext(
  25. basename(filename))[0]
  26. sn = fclean(str(wb.get_sheet_names()[i]))
  27. f = bf + '_' + sn + '.csv'
  28. if not os.path.exists(f):
  29. with open(f, 'wb') as outf:
  30. out_writer = csv.writer(outf)
  31. for row in sheet.iter_rows():
  32. out_writer.writerow([uclean(cell.value) for cell in row])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement