Advertisement
Guest User

Untitled

a guest
Mar 29th, 2020
171
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.75 KB | None | 0 0
  1. import pandas as pd
  2.  
  3. data = pd.read_csv('/datasets/visits_eng.csv', sep='\t')
  4. data['local_time'] = (
  5. pd.to_datetime(data['date_time'], format='%Y-%m-%dT%H:%M:%S')
  6. - pd.Timedelta(hours=7)
  7. )
  8. data['date_hour'] = data['local_time'].dt.round('1H')
  9.  
  10. # filter excessively fast and slow visits and gas stations
  11. data['too_fast'] = data['time_spent'] < 60
  12. data['too_slow'] = data['time_spent'] > 1000
  13. too_fast_stat = data.pivot_table(index='id', values='too_fast')
  14. good_ids = too_fast_stat.query('too_fast < 0.5')
  15. good_data = data.query('id in @good_ids.index')
  16. good_data = good_data.query('60 <= time_spent <= 1000')
  17.  
  18. # consider data by individual gas station and by chains
  19. station_stat = data.pivot_table(index='id', values='time_spent', aggfunc='median')
  20. good_station_stat = good_data.pivot_table(index='id', values='time_spent', aggfunc='median')
  21. name_stat = data.pivot_table(index='name', values='time_spent')
  22. good_name_stat = good_data.pivot_table(index='name', values='time_spent', aggfunc='median')
  23. name_stat['good_time_spent'] = good_name_stat['time_spent']
  24.  
  25. id_name = good_data.pivot_table(index='id', values='name', aggfunc=['first', 'count'])
  26. id_name.columns = ['name', 'count']
  27. station_stat_full = id_name.join(good_station_stat)
  28.  
  29. # calculate the chains' results from the gas stations results,
  30. # but not average visits to all of a chain's gas stations
  31. good_name_stat2 = (
  32. station_stat_full
  33. .query('count > 30')
  34. .pivot_table(index='name', values='time_spent', aggfunc=['median', 'count'])
  35. )
  36. good_name_stat2.columns = ['median_time', 'stations']
  37. final_stat = name_stat.join(good_name_stat2)
  38. final_stat = final_stat.sort_values(by='median_time')
  39. #print(final_stat.head())
  40. final_stat.plot(y='median_time', kind = 'bar', figsize=(10, 5))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement