Advertisement
Guest User

Untitled

a guest
Jan 20th, 2020
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.92 KB | None | 0 0
  1. import pandas as pd
  2.  
  3. data = pd.read_csv("/datasets/visits.csv", sep="\ ")
  4. data['local_time'] = (
  5. pd.to_datetime(data['date_time'], format='%Y-%m-%dT%H:%M:%S')
  6. + pd.Timedelta(hours=3)
  7. )
  8. data['date_hour'] = data['local_time'].dt.round('1H')
  9. data['too_fast'] = data['time_spent'] < 60
  10. data['too_slow'] = data['time_spent'] > 1000
  11.  
  12. too_fast_stat = data.pivot_table(index="id", values="too_fast")
  13. good_ids = too_fast_stat.query('too_fast < 0.5')
  14. good_data = data.query('id in @good_ids.index and 60 <= time_spent <= 1000')
  15. good_stations_stat = good_data.pivot_table(index='id', values='time_spent', aggfunc='median')
  16.  
  17. stat = data.pivot_table(index='name', values='time_spent')
  18. good_stat = good_data.pivot_table(index='name', values='time_spent', aggfunc='median')
  19. stat['good_time_spent'] = good_stat['time_spent']
  20. id_name = good_data.pivot_table(index='id', values='name', aggfunc=['first', 'count'])
  21. print(id_name.head())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement