Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from collections import OrderedDict
- from collections import Counter
- import numpy as np
- import pandas
- import heapq
- visit_popularity = Counter()
- purchase_popularity = Counter()
- fTutor = open('train.txt', 'r')
- for line in fTutor.readlines():
- visits, purchases = line.strip().split(';')
- visits = visits.split(',')
- purchases = purchases.split(',')
- for vId in visits:
- visit_popularity[vId] += 1
- for pId in purchases:
- purchase_popularity[pId] += 1
- def recommend_by_purchase(items, max_count):
- return heapq.nlargest(max_count, OrderedDict.fromkeys(items), key=lambda x: purchase_popularity.get(x, 0))
- def recommend_by_visit(items, max_count):
- return heapq.nlargest(max_count, OrderedDict.fromkeys(items), key=lambda x: visit_popularity.get(x, 0))
- def calculate_metrics(recommend, max_count, sessions_file):
- fout = open(sessions_file, 'r')
- avg_recall = np.zeros(max_count)
- avg_precision = np.zeros(max_count)
- sessions_count = 0
- for line in fout.readlines():
- visits, purchases = line.strip().split(';')
- if purchases != '':
- visits = visits.split(',')
- purchases = purchases.split(',')
- rec = recommend(visits, max_count)
- #print (rec)
- countFlags = 0
- counter = np.zeros(max_count)
- i = 0
- for recomendation in rec:
- flag = False
- for elem in purchases:
- if (elem == recomendation):
- flag = True
- break;
- countFlags += flag
- counter[i] = countFlags
- i += 1
- for i in range(1, max_count):
- if counter[i] == 0:
- counter[i] = counter[i - 1]
- for i in range(max_count):
- avg_recall[i] += counter[i] * 1.0 / len(purchases)
- avg_precision[i] += counter[i] * 1.0 / (i + 1)
- sessions_count += 1
- # TODO
- # update avg_recall
- # update avg_precision
- # update sessions_count
- return pandas.DataFrame({
- 'k': np.arange(max_count) + 1,
- 'avg_recall@k': ([round(x, 2) for x in avg_recall / sessions_count]),
- 'avg_precision@k': ([round(x, 2) for x in avg_precision / sessions_count])
- }).set_index('k')
- print('visit train', calculate_metrics(recommend_by_visit, 5, 'train.txt'))
- print('visit test', calculate_metrics(recommend_by_visit, 5, 'test.txt'))
- print('purch train', calculate_metrics(recommend_by_purchase, 5, 'train.txt'))
- print('purch test', calculate_metrics(recommend_by_purchase, 5, 'test.txt'))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement