Untitled

import csv
from formic import FileSet
from openpyxl import load_workbook
import re
from os.path import basename
import os
import string


def uclean(s): # Clean out non-unicode chars for csv.writer - SLOW
    try:
        return ''.join(char for char in s if char in string.printable).strip()
    except:
        return ''

def fclean(s): # Clean out non-filename-safe chars
    return ''.join([c for c in s if re.match(r'w', c)])

xlsx_files = FileSet(directory='C:\', include='**\*.xlsx') # the whole computer's excel files
for filename in xlsx_files:
    wb = load_workbook(filename, use_iterators=True, read_only=True)  # This is still using > 600 MBs
    for sheet in wb.worksheets:
        i = wb.worksheets.index(sheet)
        bf = os.path.splitext(
            basename(filename))[0]
        sn = fclean(str(wb.get_sheet_names()[i]))
        f = bf + '_' + sn + '.csv'
        if not os.path.exists(f):
            with open(f, 'wb') as outf:
                out_writer = csv.writer(outf)
                for row in sheet.iter_rows():
                    out_writer.writerow([uclean(cell.value) for cell in row])