SHARE
TWEET

Untitled

a guest Nov 18th, 2019 77 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import pandas as pd
  2.  
  3. data = pd.read_csv("/datasets/visits.csv", sep="\\t")
  4. data['local_time'] = (
  5. pd.to_datetime(data['date_time'], format='%Y-%m-%dT%H:%M:%S')
  6. + pd.Timedelta(hours=3)
  7. )
  8. data['date_hour'] = data['local_time'].dt.round('1H')
  9. data['too_fast'] = data['time_spent'] < 60
  10. data['too_slow'] = data['time_spent'] > 1000
  11.  
  12. too_fast_stat = data.pivot_table(index='id', values='too_fast')
  13. good_ids = too_fast_stat.query('too_fast < 0.5')
  14. good_data = data.query('id in @good_ids.index')
  15. good_data = good_data.query('60 <= time_spent <= 1000')
  16.  
  17. station_stat = data.pivot_table(index="id", values="time_spent", aggfunc="median")
  18. good_station_stat = good_data.pivot_table(index="id", values="time_spent", aggfunc="median")
  19.  
  20. stat = data.pivot_table(index='name', values='time_spent')
  21. good_stat = good_data.pivot_table(index='name', values='time_spent', aggfunc='median')
  22. stat['good_time_spent'] = good_stat['time_spent']
  23.  
  24. name_stat = data.pivot_table(index='name', values='time_spent')
  25. good_name_stat = good_data.pivot_table(index='name', values='time_spent', aggfunc='median')
  26. name_stat['good_time_spent'] = good_name_stat['time_spent']
  27.  
  28. id_name = good_data.pivot_table(index='id', values='name', aggfunc=['first', 'count'])
  29. id_name.columns = ['name', 'count']
  30. station_stat_full = id_name.join(good_station_stat)
  31.  
  32. good_stat2 = (
  33. station_stat_full
  34. .query('count > 30')
  35. .pivot_table(index='name', values='time_spent', aggfunc=['median', 'count'])
  36. )
  37. good_stat2.columns = ['median_time', 'stations']
  38. final_stat = stat.join(good_stat2)
  39. (final_stat
  40. .dropna(subset=['median_time'])
  41. .sort_values('median_time')
  42. .plot(y='median_time', kind='bar', figsize=(10, 5), grid=True)
  43. )
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top