Advertisement
AnnaCh1971

Стабильность кумулятивных метрик задача 4

Aug 8th, 2023
1,315
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.48 KB | None | 0 0
  1. import pandas as pd
  2. import datetime as dt
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. from pandas.plotting import register_matplotlib_converters
  6. import warnings
  7. # конвертеры, которые позволяют использовать типы pandas в matplotlib  
  8. register_matplotlib_converters()
  9.  
  10. orders = pd.read_csv('/datasets/data_for_tasks_3.csv', sep=',')
  11. orders['date'] = orders['date'].map(
  12.     lambda x: dt.datetime.strptime(x, '%d/%m/%Y')
  13. )
  14.  
  15. visitors = pd.read_csv('/datasets/data_for_tasks_3_visitors.csv', sep=',')
  16. visitors['date'] = visitors['date'].map(
  17.     lambda x: dt.datetime.strptime(x, '%d/%m/%Y')
  18. )
  19.  
  20. datesGroups = orders[['date', 'group']].drop_duplicates()
  21.  
  22. ordersAggregated = datesGroups.apply(
  23.     lambda x: orders[
  24.         np.logical_and(
  25.             orders['date'] <= x['date'], orders['group'] == x['group']
  26.         )
  27.     ].agg(
  28.         {
  29.             'date': 'max',
  30.             'group': 'max',
  31.             'orderId': 'nunique',
  32.             'userId': 'nunique',
  33.             'revenue': 'sum',
  34.         }
  35.     ),
  36.     axis=1,
  37. ).sort_values(by=['date', 'group'])
  38.  
  39. visitorsAggregated = datesGroups.apply(
  40.     lambda x: visitors[
  41.         np.logical_and(
  42.             visitors['date'] <= x['date'], visitors['group'] == x['group']
  43.         )
  44.     ].agg({'date': 'max', 'group': 'max', 'visitors': 'sum'}),
  45.     axis=1,
  46. ).sort_values(by=['date', 'group'])
  47.  
  48. cumulativeData = ordersAggregated.merge(
  49.     visitorsAggregated, left_on=['date', 'group'], right_on=['date', 'group']
  50. )
  51. cumulativeData.columns = [
  52.     'date',
  53.     'group',
  54.     'orders',
  55.     'buyers',
  56.     'revenue',
  57.     'visitors',
  58. ]
  59.  
  60. cumulativeRevenueA = cumulativeData[cumulativeData['group'] == 'A'][
  61.     ['date', 'revenue', 'orders']
  62. ]
  63. cumulativeRevenueB = cumulativeData[cumulativeData['group'] == 'B'][
  64.     ['date', 'revenue', 'orders']
  65. ]
  66. mergedCumulativeRevenue = cumulativeRevenueA.merge(cumulativeRevenueB, left_on = 'date',
  67.                                                    right_on='date',  how='left',
  68.                                                   suffixes = ['A', 'B'])
  69.  
  70. plt.figure(figsize=(12,4))
  71. plt.plot(mergedCumulativeRevenue['date'],
  72.          (mergedCumulativeRevenue['revenueB']/mergedCumulativeRevenue['ordersB'] -
  73.              mergedCumulativeRevenue['revenueA']/mergedCumulativeRevenue['ordersA'])/(
  74.          mergedCumulativeRevenue['revenueA']/mergedCumulativeRevenue['ordersA']))
  75. plt.axhline(y = 0, color ="black", linestyle ="--")
  76. plt.show()
  77.  
  78.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement