Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- data = pd.read_csv("/datasets/visits.csv", sep="\t")
- data['local_time'] = (
- pd.to_datetime(data['date_time'], format='%Y-%m-%dT%H:%M:%S')
- + pd.Timedelta(hours=3)
- )
- data['date_hour'] = data['local_time'].dt.round('1H')
- data['too_fast'] = data['time_spent'] < 60
- data['too_slow'] = data['time_spent'] > 1000
- too_fast_stat = data.pivot_table(index='id', values='too_fast')
- good_ids = too_fast_stat.query('too_fast < 0.5')
- good_data = data.query('id in @good_ids.index')
- good_data = good_data.query('time_spent > 60 & time_spent < 1000')
- print(len(good_data))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement