Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- def result_printer(msg, result):
- print("-" * 40)
- print(msg)
- print(result)
- print("-" * 40)
- class Solution:
- def __init__(self):
- csv = pd.read_csv("SWE sample data - Q3 data.csv")
- self.data = csv
- # remove pandas print width limit
- pd.set_option('display.expand_frame_repr', False)
- def q1_most_unique_user(self):
- df = self.data[self.data.country_id == "BDV"][["site_id", "user_id"]]
- results = df.groupby(["site_id"])["user_id"].nunique() \
- .sort_values(ascending=False).reset_index(name='unique_user_id')
- return results.head(1)
- def q2(self):
- df = self.data[(self.data.ts >= "2019-02-03 00:00:00") & (self.data.ts <= "2019-02-04 23:59:59")]
- df_grouped = df.groupby(["site_id", "user_id"]).count()
- return df_grouped[df_grouped.ts >= 10]
- def q3_num_last_visit(self):
- df = self.data.groupby(["user_id"]).apply(
- lambda x: x.sort_values(["ts"], ascending=False).head(1).reset_index(drop=True)
- )
- return df.groupby(["site_id"]).count().sort_values(["user_id"], ascending=False)
- def q4_same_first_last_visit(self, exclude_users_who_visit_only_once):
- # filter users who visited only once
- if exclude_users_who_visit_only_once:
- df = self.data.drop("country_id", axis=1).groupby(["user_id"]) \
- .filter( lambda df: df['user_id'].count() > 1).reset_index(drop=True)
- else:
- df = self.data
- grouped = df.groupby(["user_id"])
- first_visit = grouped.apply(lambda df: df[df.ts == df.ts.min()]).reset_index(drop=True)
- last_visit = grouped.apply(lambda df: df[df.ts == df.ts.max()]).reset_index(drop=True)
- df_result = first_visit.merge(last_visit, left_on='user_id', right_on="user_id")
- return df_result[df_result.site_id_x == df_result.site_id_y]
- solution = Solution()
- # print(solution.data)
- result_printer("Site with Most users in BDV:", solution.q1_most_unique_user())
- result_printer("Q2:", solution.q2())
- result_printer("Q3 num of last visit:", solution.q3_num_last_visit())
- result_printer("Q4 same first/last visit user: \n Excluding users who visited only once", solution.q4_same_first_last_visit(True))
- result_printer("Q4 same first/last visit user: \n Including users who visited only once", solution.q4_same_first_last_visit(False))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement