Advertisement
Guest User

Untitled

a guest
Feb 19th, 2019
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.37 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3.  
  4.  
  5.  
  6. def result_printer(msg, result):
  7. print("-" * 40)
  8. print(msg)
  9. print(result)
  10. print("-" * 40)
  11.  
  12.  
  13. class Solution:
  14. def __init__(self):
  15. csv = pd.read_csv("SWE sample data - Q3 data.csv")
  16. self.data = csv
  17. # remove pandas print width limit
  18. pd.set_option('display.expand_frame_repr', False)
  19.  
  20. def q1_most_unique_user(self):
  21. df = self.data[self.data.country_id == "BDV"][["site_id", "user_id"]]
  22. results = df.groupby(["site_id"])["user_id"].nunique() \
  23. .sort_values(ascending=False).reset_index(name='unique_user_id')
  24. return results.head(1)
  25.  
  26. def q2(self):
  27. df = self.data[(self.data.ts >= "2019-02-03 00:00:00") & (self.data.ts <= "2019-02-04 23:59:59")]
  28. df_grouped = df.groupby(["site_id", "user_id"]).count()
  29. return df_grouped[df_grouped.ts >= 10]
  30.  
  31. def q3_num_last_visit(self):
  32. df = self.data.groupby(["user_id"]).apply(
  33. lambda x: x.sort_values(["ts"], ascending=False).head(1).reset_index(drop=True)
  34. )
  35. return df.groupby(["site_id"]).count().sort_values(["user_id"], ascending=False)
  36.  
  37. def q4_same_first_last_visit(self, exclude_users_who_visit_only_once):
  38. # filter users who visited only once
  39. if exclude_users_who_visit_only_once:
  40. df = self.data.drop("country_id", axis=1).groupby(["user_id"]) \
  41. .filter( lambda df: df['user_id'].count() > 1).reset_index(drop=True)
  42. else:
  43. df = self.data
  44. grouped = df.groupby(["user_id"])
  45. first_visit = grouped.apply(lambda df: df[df.ts == df.ts.min()]).reset_index(drop=True)
  46. last_visit = grouped.apply(lambda df: df[df.ts == df.ts.max()]).reset_index(drop=True)
  47. df_result = first_visit.merge(last_visit, left_on='user_id', right_on="user_id")
  48.  
  49. return df_result[df_result.site_id_x == df_result.site_id_y]
  50.  
  51.  
  52. solution = Solution()
  53. # print(solution.data)
  54.  
  55. result_printer("Site with Most users in BDV:", solution.q1_most_unique_user())
  56. result_printer("Q2:", solution.q2())
  57. result_printer("Q3 num of last visit:", solution.q3_num_last_visit())
  58. result_printer("Q4 same first/last visit user: \n Excluding users who visited only once", solution.q4_same_first_last_visit(True))
  59. result_printer("Q4 same first/last visit user: \n Including users who visited only once", solution.q4_same_first_last_visit(False))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement