Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- from formic import FileSet
- from openpyxl import load_workbook
- import re
- from os.path import basename
- import os
- import string
- def uclean(s): # Clean out non-unicode chars for csv.writer - SLOW
- try:
- return ''.join(char for char in s if char in string.printable).strip()
- except:
- return ''
- def fclean(s): # Clean out non-filename-safe chars
- return ''.join([c for c in s if re.match(r'w', c)])
- xlsx_files = FileSet(directory='C:\', include='**\*.xlsx') # the whole computer's excel files
- for filename in xlsx_files:
- wb = load_workbook(filename, use_iterators=True, read_only=True) # This is still using > 600 MBs
- for sheet in wb.worksheets:
- i = wb.worksheets.index(sheet)
- bf = os.path.splitext(
- basename(filename))[0]
- sn = fclean(str(wb.get_sheet_names()[i]))
- f = bf + '_' + sn + '.csv'
- if not os.path.exists(f):
- with open(f, 'wb') as outf:
- out_writer = csv.writer(outf)
- for row in sheet.iter_rows():
- out_writer.writerow([uclean(cell.value) for cell in row])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement