Advertisement
Guest User

Untitled

a guest
Oct 23rd, 2019
94
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.04 KB | None | 0 0
  1. # eval recommendations from MovieLens data
  2. # tested it on DSSTNE, because it has straightforward steps to getting recommendations
  3. # to use to test this
  4. # fills in rec_titles and data_titles - a sorted list of recommended titles and original titles popularity
  5. # missing_titles is the list of movies not recommended from the original dataset
  6.  
  7. RECS_FILE = '[your path to "recs" file from DSSTNE]'
  8. TITLES_FILE = '[your path to "movies.csv" from the MovieLens .zip file]'
  9. ORIGINAL_DATA = '[your path to "ml-20m_ratings.csv" from MovieLens .zip file]'
  10.  
  11. data = {}
  12. with open(ORIGINAL_DATA, 'r') as file:
  13. line = file.readline() # skip header
  14. line = file.readline()
  15. while line:
  16. line = line[line.index(',')+1:]
  17. index = int(line[:line.index(',')])
  18. data[index] = data.get(index, 0) + 1
  19. line = file.readline()
  20.  
  21. recs = {}
  22. with open(RECS_FILE, 'r') as file:
  23. line = file.readline()
  24. while line:
  25. #usr_id = line[:line.index('\t')]
  26. line = line[line.index('\t')+1:]
  27.  
  28. for _ in range(10):
  29. index = int(line[:line.index(',')])
  30. recs[index] = recs.get(index, 0) + 1
  31. line = line[line.index(':')+1:]
  32.  
  33. line = file.readline()
  34.  
  35.  
  36. movies = {}
  37. with open(TITLES_FILE, 'r') as file:
  38. line = file.readline() # skip header
  39. line = file.readline()
  40. while line:
  41. index = line[:line.index(',')]
  42. index = int(index)
  43. line = line[line.index(',')+1:]
  44. if line[0] == '"':
  45. line = line[1:]
  46. title = line[:line.index('"')]
  47. else:
  48. title = line[:line.index(',')]
  49. movies[index] = title
  50. line = file.readline()
  51.  
  52. rec_titles = []
  53. for k,v in recs.items():
  54. rec_titles.append( (v, data[k], movies[k]) )
  55. rec_titles.sort(reverse=True)
  56.  
  57. data_titles = []
  58. for k,v in data.items():
  59. data_titles.append( (v, movies[k]) )
  60. data_titles.sort(reverse=True)
  61.  
  62. missing_titles = []
  63. for k,v in data.items():
  64. if k not in recs:
  65. missing_titles.append( (v, movies[k]) )
  66. missing_titles.sort(reverse=True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement