Advertisement
Guest User

Untitled

a guest
Oct 23rd, 2019
135
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.59 KB | None | 0 0
  1. import pandas as pd
  2.  
  3. data = pd.read_csv("/datasets/visits.csv", sep="\t")
  4. data['local_time'] = (
  5. pd.to_datetime(data['date_time'], format='%Y-%m-%dT%H:%M:%S')
  6. + pd.Timedelta(hours=3)
  7. )
  8. data['date_hour'] = data['local_time'].dt.round('1H')
  9. data['too_fast'] = data['time_spent'] < 60
  10. data['too_slow'] = data['time_spent'] > 1000
  11.  
  12. too_fast_stat = data.pivot_table(index='id', values='too_fast')
  13. good_ids = too_fast_stat.query('too_fast < 0.5')
  14. good_data = data.query('id in @good_ids.index')
  15.  
  16. good_data = good_data.query('time_spent > 60 & time_spent < 1000')
  17. print(len(good_data))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement