Advertisement
Guest User

Untitled

a guest
Sep 19th, 2019
148
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.03 KB | None | 0 0
  1. from plotly import tools
  2. from plotly.offline import plot
  3. import plotly.graph_objs as go, numpy as np
  4. from datetime import timedelta, datetime
  5. from src.utils import test_exist
  6. from collections import Counter
  7. from scipy.spatial import distance
  8.  
  9. def validation(env,user_data, min_date):
  10.     #min_date = datetime(2018, 9, 13, 0, 0)
  11.     max_date = datetime(2018, 11, 2, 23, 50, 25)
  12.     last_date = max_date  # + timedelta(days=7)
  13.     cur_date_r = min_date + timedelta(days=7)
  14.     cur_date_l = min_date
  15.     weekly_dist = {}
  16.     week_iter = 0
  17.  
  18.     weekly_amounts = {}
  19.     user_set = [u.name for u in env.client_list]
  20.     weekly_user_dist = {k:{} for k in user_set}
  21.     while cur_date_r <= last_date:
  22.         weekly_dist[week_iter] = {shop: 0 for shop in env.avg_prices}
  23.         weekly_amounts[week_iter] = 0
  24.         for user in user_set:
  25.  
  26.             for trans in user_data[user]:
  27.                 if trans['shop'] in env.avg_prices:
  28.                     date = datetime.strptime(trans['TRANS_DATE'] + ' ' + trans['TRANS_TIME'], '%d.%m.%Y %H:%M:%S')
  29.                     if date >= cur_date_l and date < cur_date_r:
  30.                         weekly_amounts[week_iter] += trans['AMOUNT_EQ']
  31.                         weekly_dist.setdefault(week_iter, {})[trans['shop']] += 1
  32.                         weekly_user_dist.setdefault(user, {}).setdefault(week_iter, {}).setdefault(trans['shop'],
  33.                                                                                                    []).append(1)
  34.             if not test_exist(weekly_user_dist[user], week_iter):
  35.                 weekly_user_dist[user][week_iter] = {}
  36.         cur_date_l += timedelta(days=7)
  37.         cur_date_r += timedelta(days=7)
  38.         week_iter += 1
  39.  
  40.     client_error = {}
  41.     shop_error = {}
  42.     client_error_visits = {}
  43.     tp_shops = {shop.name: {week: len(shop.transactions[week]) for week in shop.transactions} for shop in env.shop_list}
  44.     amts_shops = {shop.name: {week: sum([x[2] for x in shop.transactions[week]]) for week in shop.transactions} for shop
  45.                   in env.shop_list}
  46.     for client in env.client_list:
  47.         temp_error = []
  48.         temp_sums = []
  49.         temp_error_shop = {}
  50.         t_shops = {week: Counter([shop[0].name for shop in trans]) for week, trans in client.transactions.items()}
  51.         for week in t_shops:
  52.             temp_week_err = []
  53.             data_vect = [
  54.                 len(weekly_user_dist[client.name][week][shop]) if test_exist(weekly_user_dist[client.name][week],
  55.                                                                              shop) else 0 for shop in env.avg_prices]
  56.             temp_sums.append(sum(data_vect))
  57.             model_vect = [t_shops[week][shop] if test_exist(t_shops[week], shop) else 0 for shop in env.avg_prices]
  58.             temp_error.append(distance.minkowski(data_vect, model_vect, 1))# ** (2))
  59.         client_error[client.name] = np.mean(temp_error)# ** (1 / 2), np.mean(temp_error) ** (1 / 2) / np.mean(temp_sums)
  60.  
  61.  
  62.         trans = [v for v in client.transactions.values()]
  63.         rs = []
  64.         for week in t_shops:
  65.             try:
  66.                 len(trans[week])
  67.                 rs.append((temp_sums[week] - len(trans[week])))
  68.             except IndexError:
  69.                 pass
  70.         client_error_visits[client.name]  =  np.mean(rs)
  71.  
  72.  
  73.     temp_shop_sums = {}
  74.     temp_all_error = {}
  75.     for week in t_shops:
  76.         temp_all_error[week] = [0, 0]
  77.         for shop in env.avg_prices:
  78.             temp_shop_sums.setdefault(shop, []).append(weekly_dist[week][shop])
  79.             if test_exist(weekly_dist, week) and test_exist(tp_shops[shop], week):
  80.                 temp_error_shop.setdefault(shop, []).append(((weekly_dist[week][shop]) - tp_shops[shop][week]) ** 2)
  81.                 temp_all_error[week][0] += weekly_dist[week][shop]
  82.                 temp_all_error[week][1] += tp_shops[shop][week]
  83.             elif test_exist(weekly_dist, week):
  84.                 temp_error_shop.setdefault(shop, []).append(((weekly_dist[week][shop]) - 0) ** 2)
  85.             else:
  86.                 temp_error_shop.setdefault(shop, []).append((0 - tp_shops[shop][week]) ** 2)
  87.  
  88.     shop_error = {shop: np.mean(temp_error_shop[shop]) ** (1 / 2) / np.mean(temp_shop_sums[shop]) for shop in
  89.                   temp_error_shop}
  90.     return client_error, shop_error
  91.  
  92.     def plot_cumulative(env, min_date, visits_data):
  93.     from sklearn.metrics import mean_squared_error
  94.     def chunks(l, n):
  95.         """Yield successive n-sized chunks from l."""
  96.         for i in range(0, len(l), n):
  97.             yield l[i:i + n]
  98.     def calc_err(t1,t2):
  99.         t1 = [sum(x) for x in chunks(t1,24)]
  100.  
  101.         t2 = [sum(x) for x in chunks(t2,24) if x != 0]
  102.  
  103.         return t1,t2
  104.  
  105.     def make_plot(name, m_v, m_a, t_v, t_a):
  106.         #m_a = [int(b) / int(m) if int(m) > 0 else 0 for b, m in zip(m_a, m_v) ]
  107.         #_a = [int(b) / int(m) if int(m) > 0 else 0  for b, m in zip(t_a, t_v)]
  108.         t_v,t_a = calc_err(t_v,t_a)
  109.         m_v, m_a = calc_err(m_v, m_a)
  110.         v1 = np.mean(t_v)
  111.         v2 = np.mean(m_v)
  112.         w1 = np.std(t_v)
  113.         w2 = np.std(m_v)
  114.         m1 = np.mean(t_a)
  115.         m2 = np.mean(m_a)
  116.         mw1 = np.std(t_a)
  117.         mw2 = np.std(m_a)
  118.         mapa = {'prisma':'Призма','spar':'Спар','dixy':'Дикси','magnit':'Магнит','semishagoff':'Семишагофф','okey':'Окей','perekrestok':'Перекресток','auchan':'Ашан','lenta':'Лента','pyaterochka':'Пятерочка','real':'Реал','semya':'Семья'}
  119.         return (go.Scatter(x=[v1, v2], y=[m1, m2], mode='markers', marker=dict(symbol=['circle', 'square'], size=15),
  120.                            error_x=dict(type='data', array=[w1, w2]), error_y=dict(type='data', array=[mw1, mw2]),
  121.                            name=mapa[name]))
  122.  
  123.     def make_cumulative(ar):
  124.         res = [ar[0]]
  125.         for i in range(1, len(ar)):
  126.             res.append(res[i - 1] + ar[i])
  127.         return res
  128.  
  129.     data_visits = []
  130.     data_visits_err = []
  131.     data_amounts = []
  132.     diff = int(abs((datetime(2018, 9, 14, 0, 0) - min_date).total_seconds()//3600))
  133.     data_space = []
  134.     for shop in env.cumulative_shops[0].keys():
  135.         x, y, y1, y2, y3, y4 = [], [], [], [], [], []
  136.  
  137.         for h in env.cumulative_shops.keys():
  138.             x.append(min_date + timedelta(hours=h))
  139.             y.append(env.cumulative_shops[h][shop][0])
  140.             y1.append(env.cumulative_shops[h][shop][1])
  141.             y2.append(visits_data[h + diff][shop][0])
  142.             y3.append(visits_data[h + diff][shop][1])
  143.             if visits_data[h + diff][shop][0] > 0:
  144.                 y4.append(
  145.                     abs(env.cumulative_shops[h][shop][0] - visits_data[h + diff][shop][0]) / visits_data[h + diff][shop][
  146.                         0])
  147.             else:
  148.                 y4.append(0)
  149.  
  150.         data_visits.append(go.Bar(x=x, y=make_cumulative(y), opacity=0.8, name=shop + '_model'))
  151.         data_amounts.append(go.Bar(x=x, y=make_cumulative(y1), opacity=0.8, name=shop + '_model'))
  152.         data_visits.append(go.Bar(x=x, y=make_cumulative(y2), opacity=0.8, name=shop + '_data'))
  153.         data_amounts.append(go.Bar(x=x, y=make_cumulative(y3), opacity=0.8, name=shop + '_data'))
  154.         data_space.append(make_plot(shop,y,y1,y2,y3))
  155.     layout = go.Layout(
  156.         legend=dict(font=dict(size=20)),
  157.         yaxis=dict(title='Number of visits', titlefont=dict(size=25)),
  158.         xaxis=go.layout.XAxis(
  159.             tickformat='%d %B (%a)<br> %H'
  160.         )
  161.     )
  162.  
  163.     fig = go.Figure(
  164.         data=data_visits,
  165.         layout=layout)
  166.     fig_e = go.Figure(
  167.         data=data_visits_err,
  168.         layout=layout)
  169.     layout['yaxis']['title'] = 'Amount'
  170.     fig1 = go.Figure(
  171.         data=data_amounts,
  172.         layout=layout)
  173.     layout = go.Layout()
  174.     layout['yaxis']['title'] = 'Среднедневный оборот'
  175.     layout['xaxis']['title'] = 'Среднее число визитов (в день)'
  176.     layout['xaxis']['titlefont'] =dict(size=25)
  177.     layout['yaxis']['titlefont'] =dict(size=25)
  178.     layout['xaxis']['tickfont'] = dict(size=20)
  179.     layout['yaxis']['tickfont'] = dict(size=20)
  180.     fig_space = go.Figure(data=data_space, layout=layout)
  181.     plot(fig_space)
  182.     plot(fig ,filename='plots/cumulative_visits.html')
  183.     plot(fig1,filename='plots/cumulative_amounts.html')
  184.  
  185. def plot_user(user, first_week, env, test_set, days):
  186.  
  187.     trans = test_set[user]
  188.     client = [client for client in env.client_list if client.name ==user][0]
  189.     modelled = []
  190.     for k,v in client.transactions.items():
  191.         for trans in v:
  192.             modelled.append([trans[0].name, (trans[1]-trans[2]) /  24,trans[2],trans[-1]])
  193.  
  194.     from_data = []
  195.     transactions = sorted(test_set[user], key= lambda x: datetime.strptime(x['TRANS_DATE'],'%d.%m.%Y'))
  196.     transactions = [x for x in transactions if datetime.strptime(x['TRANS_DATE'],'%d.%m.%Y') > (first_week + timedelta(days=7)) and datetime.strptime(x['TRANS_DATE'],'%d.%m.%Y') <= (first_week + timedelta(days=14))]
  197.  
  198.     for trans in transactions:
  199.         from_data.append([trans['shop'], abs((first_week + timedelta(days=days) - datetime.strptime(trans['TRANS_DATE'],'%d.%m.%Y')).days),int(datetime.strptime(trans['TRANS_TIME'],'%H:%M:%S').hour),trans['AMOUNT_EQ']])
  200.  
  201.     to_plot = []
  202.  
  203.     for shop in set([x[0] for x in modelled]+[x[0] for x in from_data]):
  204.         x = []
  205.         y = []
  206.         shape = []
  207.         size = []
  208.         for trans in modelled:
  209.             if trans[0] == shop:
  210.                 x.append(trans[1])
  211.                 y.append(trans[2])
  212.                 shape.append('square')
  213.                 size.append(trans[-1] / 10)
  214.         for trans in from_data:
  215.             if trans[0] == shop:
  216.                 x.append(trans[1])
  217.                 y.append(trans[2])
  218.                 shape.append('circle')
  219.                 size.append(trans[-1]/ 10)
  220.         to_plot.append(go.Scatter(name=shop,mode='markers', x=x,y=y, marker=dict(symbol=shape, size=size)))
  221.  
  222.     plot([go.Bar(x=list(client.go_probs.keys()),y=list(client.go_probs.values()))])
  223.     plot(to_plot, filename='plots/'+user+'.html')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement