Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- import json
- def f():
- map_by_prod = {}
- with open('SWE sample data - Q2 data.csv', 'r') as csv_file:
- lines = csv.reader(csv_file, delimiter=' ', quotechar='"')
- for row in lines:
- # print(row[0])
- line = json.loads(row[0])
- # by num of unique user
- if map_by_prod.get(line['product_id'], None) is None:
- map_by_prod[line['product_id']] = {
- "num_user": {line['user_id']},
- "quantity": line['quantity']
- }
- else:
- map_by_prod[line['product_id']]["num_user"].add(line['user_id'])
- map_by_prod[line['product_id']]["quantity"] += line['quantity']
- best_prod_by_num_user = ("", 0)
- best_prod_by_quantity = ("", 0)
- # sort takes nlog(n) time, might as well loop through it
- for (k, v) in map_by_prod.items():
- if v["quantity"] > best_prod_by_quantity[1]:
- best_prod_by_quantity = (k, v["quantity"])
- if len(v["num_user"]) > best_prod_by_num_user[1]:
- best_prod_by_num_user = (k, len(v["num_user"]))
- return (
- "Most popular product(s) based on the number of purchasers: {0} with {1} unique users."
- .format(best_prod_by_num_user[0], best_prod_by_num_user[1]),
- "Most popular product(s) based on the quantity of goods sold: {0} with {1} items sold."
- .format(best_prod_by_quantity[0], best_prod_by_quantity[1])
- )
- print(f())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement