pasholnahuy

Untitled

Nov 5th, 2023
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.79 KB | None | 0 0
  1. import datetime
  2. def get_df_sellers(sellers, items, products, orders, translation):
  3.     my_sellers = sellers[['seller_id', 'seller_state']].drop_duplicates()
  4.     merged_df = my_sellers.merge(items[['order_id', 'product_id', 'seller_id']], on = 'seller_id')
  5.     df = products.merge(merged_df, on = 'product_id')
  6.     df = df[['seller_state', 'product_category_name', 'order_id']].merge(orders[['order_status', 'order_id', 'order_delivered_customer_date']], on = 'order_id')
  7.     df = df.merge(translation, on = "product_category_name")
  8.     df.drop(columns = ['product_category_name'], inplace = True)
  9.     df.rename(columns = {'product_category_name_english': 'category', 'seller_state': 'state', 'order_status': 'status', 'order_delivered_customer_date' : 'date'}, inplace = True)
  10.     df = df.dropna()
  11.     df['date'] = pd.to_datetime(df['date']).apply(lambda x: x.strftime("%Y-%m"))
  12.     df = df.groupby(['date', 'state' , 'status','category']).agg({'order_id' : 'count'}).reset_index().rename(columns = {"order_id" : "count"})
  13.     return df
  14. def get_df_customers(customers, items, products, orders, translation):
  15.     my_customers = customers[['customer_id', 'customer_state']].drop_duplicates()
  16.     merged_df = my_customers.merge(orders, on = 'customer_id')
  17.     df = items.merge(merged_df, on = 'order_id')
  18.     df = df.merge(products, on = 'product_id')
  19.     df = df.merge(translation, on = "product_category_name")
  20.     df.drop(columns = ['product_category_name'], inplace = True)
  21.     df.rename(columns = {'product_category_name_english': 'category', 'customer_state': 'state', 'order_status': 'status', 'order_delivered_customer_date' : 'date'}, inplace = True)
  22.     df = df.dropna()
  23.     df['date'] = pd.to_datetime(df['date']).apply(lambda x: x.strftime("%Y-%m"))
  24.     df = df.groupby(['date', 'state' , 'status','category']).agg({'order_id' : 'count'}).reset_index().rename(columns = {"order_id" : "count"})
  25.     return df
  26. df = get_df_sellers(sellers, items, products, orders, translation)
  27.  
  28. all_dates = df
  29. all_dates['date'] = pd.to_datetime(all_dates.date).apply(lambda x: x.date())
  30. all_dates = all_dates.sort_values(by='date')
  31.  
  32. marks = {i: str(sorted(all_dates['date'])[i]) for i in range(0, all_dates.shape[0], 7850)}
  33. all_states = ['All'] + sorted(list(set(list(sellers.seller_state.unique()) + list(customers.customer_state.unique()))))
  34. all_statuses = orders.order_status.unique()
  35.     from dash import Dash, dcc, html, Input, Output
  36.     app = Dash(__name__)
  37.    
  38.     app.layout = html.Div([
  39.         html.Label('Order statuses to display:'),
  40.         dcc.Checklist(
  41.             all_statuses,
  42.             [df['status'][0]],
  43.             id = 'status-filter'
  44.         ),
  45.         html.Label('State:', style={'font-size': '120%'}),
  46.         html.Div(dcc.Dropdown(
  47.             all_states,
  48.             'All',
  49.             id = 'state-dropdown'
  50.         )),
  51.         dcc.Graph(id='sellers-graph'),
  52.         dcc.Graph(id='customers-graph'),
  53.         dcc.RangeSlider(
  54.             min=0,
  55.             max=all_dates.shape[0] - 1,
  56.             step=1,
  57.             dots=False,
  58.             value=[0, all_dates.shape[0] - 1],
  59.             marks = marks,
  60.             id='date-slider',
  61.         ),
  62.     ])
  63.    
  64.    
  65.    
  66.     @app.callback(
  67.         Output('sellers-graph', 'figure'),
  68.         [Input('date-slider', 'value'),
  69.          Input('state-dropdown', 'value'),
  70.          Input('status-filter', 'value')])
  71.    
  72.     def update_figure(time_gap, state_to_display, statuses_filter):
  73.         cur_df = get_df_sellers(sellers, items, products, orders, translation)
  74.         cur_df = cur_df[cur_df['status'].isin(statuses_filter)]
  75.         if state_to_display == 'All':
  76.             cur_df['state'] = 'All'
  77.         else:
  78.             cur_df = cur_df[cur_df.state == state_to_display]
  79.         cur_df = cur_df[pd.to_datetime(sorted(all_dates['date'])[time_gap[0]]) <= pd.to_datetime(cur_df.date)]
  80.         cur_df = cur_df[pd.to_datetime(cur_df.date) <= pd.to_datetime(sorted(all_dates['date'])[time_gap[1]])]
  81.         cur_df = cur_df.groupby(['category']).agg({'count' : 'count', 'state':'first'}).reset_index()
  82.         all_count = np.einsum('i->', cur_df['count'])
  83.         cur_df['count'] = cur_df['count'] * 100/all_count
  84.         cur_df.rename(columns = {"count":"propotion"})
  85.         fig = px.bar(cur_df, y="state", x="count", color="category",
  86.                      title="Distribution by sales categories", orientation='h', height=300)
  87.         fig.update_layout(transition_duration=500)
  88.         return fig
  89.        
  90.     @app.callback(
  91.         Output('customers-graph', 'figure'),
  92.         [Input('date-slider', 'value'),
  93.          Input('state-dropdown', 'value'),
  94.          Input('status-filter', 'value')])
  95.     def update_figure(time_gap, state_to_display, statuses_filter):
  96.         cur_df = get_df_customers(customers, items, products, orders, translation)
  97.         cur_df = cur_df[cur_df['status'].isin(statuses_filter)]
  98.         if state_to_display == 'All':
  99.             cur_df['state'] = 'All'
  100.         else:
  101.             cur_df = cur_df[cur_df.state == state_to_display]
  102.         cur_df = cur_df[pd.to_datetime(sorted(all_dates['date'])[time_gap[0]]) <= pd.to_datetime(cur_df.date)]
  103.         cur_df = cur_df[pd.to_datetime(cur_df.date) <= pd.to_datetime(sorted(all_dates['date'])[time_gap[1]])]
  104.         cur_df = cur_df.groupby(['category']).agg({'count' : 'count', 'state':'first'}).reset_index()
  105.         all_count = np.einsum('i->', cur_df['count'])
  106.         cur_df['count'] = cur_df['count'] * 100/all_count
  107.         cur_df.rename(columns = {"count":"propotion"})
  108.         fig = px.bar(cur_df, y="state", x="count", color="category",
  109.                      title="Distribution by customers categories", orientation='h', height=300)
  110.         fig.update_layout(transition_duration=500)
  111.         return fig
  112.     if __name__ == '__main__':
  113.         app.run_server(debug=True)
  114.  
Add Comment
Please, Sign In to add comment