import xlrd import os import pandas as pd def get_daily_data(sheet_name): attributes = {} try: book = xlrd.open_workbook(sheet_name) sheet = book.sheet_by_name('Sheet1') # static variables try: attributes['net_sales'] = round(float(sheet.cell(9, 14).__str__()[7:])) attributes['breakfast_sales'] = round(float(sheet.cell(25, 13).__str__()[7:])) attributes['lunch_sales'] = round(float(sheet.cell(24, 13).__str__()[7:])) attributes['dinner_sales'] = round(float(sheet.cell(23, 13).__str__()[7:])) attributes['latenight_sales'] = round(float(sheet.cell(21, 13).__str__()[7:])) attributes['total_customers'] = sheet.cell(27, 19).__str__()[7:] attributes['breakfast_customers'] = sheet.cell(25, 23).__str__()[7:] attributes['lunch_customers'] = sheet.cell(24, 23).__str__()[7:] attributes['dinner_customers'] = sheet.cell(23, 23).__str__()[7:] attributes['latenight_customers'] = sheet.cell(21, 23).__str__()[7:] except ValueError: print(sheet_name + ' is missing at least one meal.') pass # dynamic variables for row in range(37, 77): cell = sheet.cell(row, 26).__str__() if cell != "empty:''": attributes[str(cell[5:].lower().replace("'", '')) + '_sold'] = int(float(sheet.cell(row, 32).__str__()[7:])) if cell == "text:'Snacks'": attributes[str(cell[5:].lower().replace("'", '')) + '_sold'] = int(float(sheet.cell(row, 32).__str__()[7:])) break return attributes except xlrd.biffh.XLRDError: print(sheet_name + ' failed to process correctly.') pass # store all data data_dict = {} for filename in os.listdir(r'/Users/CarterW.Ward/Desktop/DOTSWeather/venv/kkd'): # string repair to match master keys full_date = filename[:10] day = full_date[3:5] month = full_date[0:2] year = full_date[6:] corrected_date = year + '-' + month + '-' + day data_dict[corrected_date] = get_daily_data(r'/Users/CarterW.Ward/Desktop/DOTSWeather/venv/kkd/' + filename) max_len = 0 max_key = '' trash = [] for date in data_dict: # finds longest set of attributes if data_dict[date] is None: trash.append(date) else: cur_len = len(data_dict[date]) if cur_len > max_len: max_key = date max_len = cur_len for piece_of in trash: data_dict.pop(piece_of) print(data_dict) names = list(data_dict[max_key].keys()) df = pd.DataFrame(columns=names) # sets longest list of attributes to column names # add date and write to excel for day in data_dict.keys(): data_dict[day]['Date'] = day df = df.append(data_dict[day], ignore_index=True) df = df.set_index('Date') df.to_excel('store_data.xlsx')