Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import xlrd
- import os
- import pandas as pd
- def get_daily_data(sheet_name):
- attributes = {}
- try:
- book = xlrd.open_workbook(sheet_name)
- sheet = book.sheet_by_name('Sheet1')
- # static variables
- try:
- attributes['net_sales'] = round(float(sheet.cell(9, 14).__str__()[7:]))
- attributes['breakfast_sales'] = round(float(sheet.cell(25, 13).__str__()[7:]))
- attributes['lunch_sales'] = round(float(sheet.cell(24, 13).__str__()[7:]))
- attributes['dinner_sales'] = round(float(sheet.cell(23, 13).__str__()[7:]))
- attributes['latenight_sales'] = round(float(sheet.cell(21, 13).__str__()[7:]))
- attributes['total_customers'] = sheet.cell(27, 19).__str__()[7:]
- attributes['breakfast_customers'] = sheet.cell(25, 23).__str__()[7:]
- attributes['lunch_customers'] = sheet.cell(24, 23).__str__()[7:]
- attributes['dinner_customers'] = sheet.cell(23, 23).__str__()[7:]
- attributes['latenight_customers'] = sheet.cell(21, 23).__str__()[7:]
- except ValueError:
- print(sheet_name + ' is missing at least one meal.')
- pass
- # dynamic variables
- for row in range(37, 77):
- cell = sheet.cell(row, 26).__str__()
- if cell != "empty:''":
- attributes[str(cell[5:].lower().replace("'", '')) + '_sold'] = int(float(sheet.cell(row, 32).__str__()[7:]))
- if cell == "text:'Snacks'":
- attributes[str(cell[5:].lower().replace("'", '')) + '_sold'] = int(float(sheet.cell(row, 32).__str__()[7:]))
- break
- return attributes
- except xlrd.biffh.XLRDError:
- print(sheet_name + ' failed to process correctly.')
- pass
- # store all data
- data_dict = {}
- for filename in os.listdir(r'/Users/CarterW.Ward/Desktop/DOTSWeather/venv/kkd'):
- # string repair to match master keys
- full_date = filename[:10]
- day = full_date[3:5]
- month = full_date[0:2]
- year = full_date[6:]
- corrected_date = year + '-' + month + '-' + day
- data_dict[corrected_date] = get_daily_data(r'/Users/CarterW.Ward/Desktop/DOTSWeather/venv/kkd/' + filename)
- max_len = 0
- max_key = ''
- trash = []
- for date in data_dict: # finds longest set of attributes
- if data_dict[date] is None:
- trash.append(date)
- else:
- cur_len = len(data_dict[date])
- if cur_len > max_len:
- max_key = date
- max_len = cur_len
- for piece_of in trash:
- data_dict.pop(piece_of)
- print(data_dict)
- names = list(data_dict[max_key].keys())
- df = pd.DataFrame(columns=names) # sets longest list of attributes to column names
- # add date and write to excel
- for day in data_dict.keys():
- data_dict[day]['Date'] = day
- df = df.append(data_dict[day], ignore_index=True)
- df = df.set_index('Date')
- df.to_excel('store_data.xlsx')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement