Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from plotly import tools
- from plotly.offline import plot
- import plotly.graph_objs as go, numpy as np
- from datetime import timedelta, datetime
- from src.utils import test_exist
- from collections import Counter
- from scipy.spatial import distance
- def validation(env,user_data, min_date):
- #min_date = datetime(2018, 9, 13, 0, 0)
- max_date = datetime(2018, 11, 2, 23, 50, 25)
- last_date = max_date # + timedelta(days=7)
- cur_date_r = min_date + timedelta(days=7)
- cur_date_l = min_date
- weekly_dist = {}
- week_iter = 0
- weekly_amounts = {}
- user_set = [u.name for u in env.client_list]
- weekly_user_dist = {k:{} for k in user_set}
- while cur_date_r <= last_date:
- weekly_dist[week_iter] = {shop: 0 for shop in env.avg_prices}
- weekly_amounts[week_iter] = 0
- for user in user_set:
- for trans in user_data[user]:
- if trans['shop'] in env.avg_prices:
- date = datetime.strptime(trans['TRANS_DATE'] + ' ' + trans['TRANS_TIME'], '%d.%m.%Y %H:%M:%S')
- if date >= cur_date_l and date < cur_date_r:
- weekly_amounts[week_iter] += trans['AMOUNT_EQ']
- weekly_dist.setdefault(week_iter, {})[trans['shop']] += 1
- weekly_user_dist.setdefault(user, {}).setdefault(week_iter, {}).setdefault(trans['shop'],
- []).append(1)
- if not test_exist(weekly_user_dist[user], week_iter):
- weekly_user_dist[user][week_iter] = {}
- cur_date_l += timedelta(days=7)
- cur_date_r += timedelta(days=7)
- week_iter += 1
- client_error = {}
- shop_error = {}
- client_error_visits = {}
- tp_shops = {shop.name: {week: len(shop.transactions[week]) for week in shop.transactions} for shop in env.shop_list}
- amts_shops = {shop.name: {week: sum([x[2] for x in shop.transactions[week]]) for week in shop.transactions} for shop
- in env.shop_list}
- for client in env.client_list:
- temp_error = []
- temp_sums = []
- temp_error_shop = {}
- t_shops = {week: Counter([shop[0].name for shop in trans]) for week, trans in client.transactions.items()}
- for week in t_shops:
- temp_week_err = []
- data_vect = [
- len(weekly_user_dist[client.name][week][shop]) if test_exist(weekly_user_dist[client.name][week],
- shop) else 0 for shop in env.avg_prices]
- temp_sums.append(sum(data_vect))
- model_vect = [t_shops[week][shop] if test_exist(t_shops[week], shop) else 0 for shop in env.avg_prices]
- temp_error.append(distance.minkowski(data_vect, model_vect, 1))# ** (2))
- client_error[client.name] = np.mean(temp_error)# ** (1 / 2), np.mean(temp_error) ** (1 / 2) / np.mean(temp_sums)
- trans = [v for v in client.transactions.values()]
- rs = []
- for week in t_shops:
- try:
- len(trans[week])
- rs.append((temp_sums[week] - len(trans[week])))
- except IndexError:
- pass
- client_error_visits[client.name] = np.mean(rs)
- temp_shop_sums = {}
- temp_all_error = {}
- for week in t_shops:
- temp_all_error[week] = [0, 0]
- for shop in env.avg_prices:
- temp_shop_sums.setdefault(shop, []).append(weekly_dist[week][shop])
- if test_exist(weekly_dist, week) and test_exist(tp_shops[shop], week):
- temp_error_shop.setdefault(shop, []).append(((weekly_dist[week][shop]) - tp_shops[shop][week]) ** 2)
- temp_all_error[week][0] += weekly_dist[week][shop]
- temp_all_error[week][1] += tp_shops[shop][week]
- elif test_exist(weekly_dist, week):
- temp_error_shop.setdefault(shop, []).append(((weekly_dist[week][shop]) - 0) ** 2)
- else:
- temp_error_shop.setdefault(shop, []).append((0 - tp_shops[shop][week]) ** 2)
- shop_error = {shop: np.mean(temp_error_shop[shop]) ** (1 / 2) / np.mean(temp_shop_sums[shop]) for shop in
- temp_error_shop}
- return client_error, shop_error
- def plot_cumulative(env, min_date, visits_data):
- from sklearn.metrics import mean_squared_error
- def chunks(l, n):
- """Yield successive n-sized chunks from l."""
- for i in range(0, len(l), n):
- yield l[i:i + n]
- def calc_err(t1,t2):
- t1 = [sum(x) for x in chunks(t1,24)]
- t2 = [sum(x) for x in chunks(t2,24) if x != 0]
- return t1,t2
- def make_plot(name, m_v, m_a, t_v, t_a):
- #m_a = [int(b) / int(m) if int(m) > 0 else 0 for b, m in zip(m_a, m_v) ]
- #_a = [int(b) / int(m) if int(m) > 0 else 0 for b, m in zip(t_a, t_v)]
- t_v,t_a = calc_err(t_v,t_a)
- m_v, m_a = calc_err(m_v, m_a)
- v1 = np.mean(t_v)
- v2 = np.mean(m_v)
- w1 = np.std(t_v)
- w2 = np.std(m_v)
- m1 = np.mean(t_a)
- m2 = np.mean(m_a)
- mw1 = np.std(t_a)
- mw2 = np.std(m_a)
- mapa = {'prisma':'Призма','spar':'Спар','dixy':'Дикси','magnit':'Магнит','semishagoff':'Семишагофф','okey':'Окей','perekrestok':'Перекресток','auchan':'Ашан','lenta':'Лента','pyaterochka':'Пятерочка','real':'Реал','semya':'Семья'}
- return (go.Scatter(x=[v1, v2], y=[m1, m2], mode='markers', marker=dict(symbol=['circle', 'square'], size=15),
- error_x=dict(type='data', array=[w1, w2]), error_y=dict(type='data', array=[mw1, mw2]),
- name=mapa[name]))
- def make_cumulative(ar):
- res = [ar[0]]
- for i in range(1, len(ar)):
- res.append(res[i - 1] + ar[i])
- return res
- data_visits = []
- data_visits_err = []
- data_amounts = []
- diff = int(abs((datetime(2018, 9, 14, 0, 0) - min_date).total_seconds()//3600))
- data_space = []
- for shop in env.cumulative_shops[0].keys():
- x, y, y1, y2, y3, y4 = [], [], [], [], [], []
- for h in env.cumulative_shops.keys():
- x.append(min_date + timedelta(hours=h))
- y.append(env.cumulative_shops[h][shop][0])
- y1.append(env.cumulative_shops[h][shop][1])
- y2.append(visits_data[h + diff][shop][0])
- y3.append(visits_data[h + diff][shop][1])
- if visits_data[h + diff][shop][0] > 0:
- y4.append(
- abs(env.cumulative_shops[h][shop][0] - visits_data[h + diff][shop][0]) / visits_data[h + diff][shop][
- 0])
- else:
- y4.append(0)
- data_visits.append(go.Bar(x=x, y=make_cumulative(y), opacity=0.8, name=shop + '_model'))
- data_amounts.append(go.Bar(x=x, y=make_cumulative(y1), opacity=0.8, name=shop + '_model'))
- data_visits.append(go.Bar(x=x, y=make_cumulative(y2), opacity=0.8, name=shop + '_data'))
- data_amounts.append(go.Bar(x=x, y=make_cumulative(y3), opacity=0.8, name=shop + '_data'))
- data_space.append(make_plot(shop,y,y1,y2,y3))
- layout = go.Layout(
- legend=dict(font=dict(size=20)),
- yaxis=dict(title='Number of visits', titlefont=dict(size=25)),
- xaxis=go.layout.XAxis(
- tickformat='%d %B (%a)<br> %H'
- )
- )
- fig = go.Figure(
- data=data_visits,
- layout=layout)
- fig_e = go.Figure(
- data=data_visits_err,
- layout=layout)
- layout['yaxis']['title'] = 'Amount'
- fig1 = go.Figure(
- data=data_amounts,
- layout=layout)
- layout = go.Layout()
- layout['yaxis']['title'] = 'Среднедневный оборот'
- layout['xaxis']['title'] = 'Среднее число визитов (в день)'
- layout['xaxis']['titlefont'] =dict(size=25)
- layout['yaxis']['titlefont'] =dict(size=25)
- layout['xaxis']['tickfont'] = dict(size=20)
- layout['yaxis']['tickfont'] = dict(size=20)
- fig_space = go.Figure(data=data_space, layout=layout)
- plot(fig_space)
- plot(fig ,filename='plots/cumulative_visits.html')
- plot(fig1,filename='plots/cumulative_amounts.html')
- def plot_user(user, first_week, env, test_set, days):
- trans = test_set[user]
- client = [client for client in env.client_list if client.name ==user][0]
- modelled = []
- for k,v in client.transactions.items():
- for trans in v:
- modelled.append([trans[0].name, (trans[1]-trans[2]) / 24,trans[2],trans[-1]])
- from_data = []
- transactions = sorted(test_set[user], key= lambda x: datetime.strptime(x['TRANS_DATE'],'%d.%m.%Y'))
- transactions = [x for x in transactions if datetime.strptime(x['TRANS_DATE'],'%d.%m.%Y') > (first_week + timedelta(days=7)) and datetime.strptime(x['TRANS_DATE'],'%d.%m.%Y') <= (first_week + timedelta(days=14))]
- for trans in transactions:
- from_data.append([trans['shop'], abs((first_week + timedelta(days=days) - datetime.strptime(trans['TRANS_DATE'],'%d.%m.%Y')).days),int(datetime.strptime(trans['TRANS_TIME'],'%H:%M:%S').hour),trans['AMOUNT_EQ']])
- to_plot = []
- for shop in set([x[0] for x in modelled]+[x[0] for x in from_data]):
- x = []
- y = []
- shape = []
- size = []
- for trans in modelled:
- if trans[0] == shop:
- x.append(trans[1])
- y.append(trans[2])
- shape.append('square')
- size.append(trans[-1] / 10)
- for trans in from_data:
- if trans[0] == shop:
- x.append(trans[1])
- y.append(trans[2])
- shape.append('circle')
- size.append(trans[-1]/ 10)
- to_plot.append(go.Scatter(name=shop,mode='markers', x=x,y=y, marker=dict(symbol=shape, size=size)))
- plot([go.Bar(x=list(client.go_probs.keys()),y=list(client.go_probs.values()))])
- plot(to_plot, filename='plots/'+user+'.html')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement