Advertisement
Guest User

Untitled

a guest
Oct 21st, 2016
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.70 KB | None | 0 0
  1. from collections import OrderedDict
  2. from collections import Counter
  3. import numpy as np
  4. import pandas
  5. import heapq
  6.  
  7. visit_popularity = Counter()
  8. purchase_popularity = Counter()
  9.  
  10. fTutor = open('train.txt', 'r')
  11. for line in fTutor.readlines():
  12.     visits, purchases = line.strip().split(';')
  13.     visits = visits.split(',')
  14.     purchases = purchases.split(',')
  15.     for vId in visits:
  16.         visit_popularity[vId] += 1
  17.     for pId in purchases:
  18.         purchase_popularity[pId] += 1
  19.  
  20. def recommend_by_purchase(items, max_count):
  21.     return heapq.nlargest(max_count, OrderedDict.fromkeys(items), key=lambda x: purchase_popularity.get(x, 0))
  22.  
  23. def recommend_by_visit(items, max_count):
  24.     return heapq.nlargest(max_count, OrderedDict.fromkeys(items), key=lambda x: visit_popularity.get(x, 0))
  25.  
  26. def calculate_metrics(recommend, max_count, sessions_file):
  27.     fout =  open(sessions_file, 'r')
  28.     avg_recall = np.zeros(max_count)
  29.     avg_precision = np.zeros(max_count)
  30.     sessions_count = 0
  31.     for line in fout.readlines():
  32.         visits, purchases = line.strip().split(';')
  33.         if purchases != '':
  34.             visits = visits.split(',')
  35.             purchases = purchases.split(',')
  36.             rec = recommend(visits, max_count)
  37.             #print (rec)
  38.             countFlags = 0
  39.             counter = np.zeros(max_count)
  40.             i = 0
  41.             for recomendation in rec:
  42.                 flag = False
  43.                 for elem in purchases:
  44.                     if (elem == recomendation):
  45.                         flag = True
  46.                         break;
  47.                 countFlags += flag
  48.                 counter[i] = countFlags
  49.                 i += 1
  50.             for i in range(1, max_count):
  51.                 if counter[i] == 0:
  52.                     counter[i] = counter[i - 1]
  53.             for i in range(max_count):
  54.                 avg_recall[i] += counter[i] * 1.0 / len(purchases)
  55.                 avg_precision[i] += counter[i] * 1.0 / (i + 1)
  56.             sessions_count += 1
  57.  
  58.  
  59.  
  60.             # TODO
  61.             # update avg_recall
  62.             # update avg_precision
  63.             # update sessions_count
  64.  
  65.     return pandas.DataFrame({
  66.             'k': np.arange(max_count) + 1,
  67.             'avg_recall@k': ([round(x, 2) for x in avg_recall / sessions_count]),
  68.             'avg_precision@k': ([round(x, 2) for x in avg_precision / sessions_count])
  69.     }).set_index('k')
  70.  
  71.  
  72.  
  73. print('visit train', calculate_metrics(recommend_by_visit, 5, 'train.txt'))
  74. print('visit test', calculate_metrics(recommend_by_visit, 5, 'test.txt'))
  75. print('purch train', calculate_metrics(recommend_by_purchase, 5, 'train.txt'))
  76. print('purch test', calculate_metrics(recommend_by_purchase, 5, 'test.txt'))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement