Advertisement
Guest User

ssqdsd

a guest
Apr 21st, 2017
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.92 KB | None | 0 0
  1. import pyspark
  2. import pyspark.mllib.recommendation as reco
  3.  
  4. try:
  5. sc = pyspark.SparkContext('local[*]')
  6. except:
  7. sc = sc
  8.  
  9. #read ratings file as lines of text, assuming no errors on data
  10. lines = sc.textFile("ml-1m/ratings.dat")
  11. ratings = lines.map(lambda l: l.split("::")). \
  12. map(lambda p: reco.Rating( user=int(p[0]), product=int(p[1]), rating=int(p[2])) )
  13.  
  14. lines = sc.textFile("ml-1m/movies.dat")
  15. movies = lines.map(lambda l: l.split("::")).map(lambda p: (int(p[0]),p[1], p[2].split('|') ))
  16. _movies = sc.parallelize(movies.sortBy(lambda a: a[2]).take(20))
  17. for m in _movies.collect():
  18. print(m)
  19. my_user_id = 0
  20.  
  21. my_ratings = [
  22. (my_user_id, 9, 5 ),
  23. (my_user_id, 20, 5 ),
  24. (my_user_id, 71, 4 ),
  25. (my_user_id, 145, 5 ),
  26. (my_user_id, 204, 4 ),
  27. (my_user_id, 227, 5 ),
  28. (my_user_id, 251, 5 ),
  29. (my_user_id, 315, 5 ),
  30. (my_user_id, 384, 4),
  31. (my_user_id, 393, 4)]
  32.  
  33. new_ratings = sc.parallelize(my_ratings).map(lambda p: reco.Rating( user=int(p[0]), product=int(p[1]), rating=int(p[2])) )
  34. ratings = ratings.union(new_ratings)
  35.  
  36. # calculate how many ratings has each movie
  37.  
  38. moviecounts = ratings.map(lambda r: (r[1], 1)).reduceByKey(lambda a,b: a+b)
  39.  
  40. # filter by ratings count
  41. too_few_ratings = moviecounts.filter(lambda x: x[1]<30)
  42.  
  43. # get only movies ID's
  44. too_few_ratings = too_few_ratings.map(lambda x: x[0]).collect()
  45.  
  46. iterations = 5;
  47. rank = 8;
  48. lambd = 0.3;
  49. #model calculation
  50. model = reco.ALS.train(ratings, rank, iterations, lambd, nonnegative=True, seed=10)
  51.  
  52. my_not_rated = _movies.filter(lambda x: x[0] not in [row[1] for row in my_ratings]) #remove my rated
  53. my_not_rated = my_not_rated.filter(lambda x: x[0] not in too_few_ratings) #remove with too few ratings
  54. my_not_rated_with_user_id = my_not_rated.map(lambda p: (my_user_id, p[0])) #add userid
  55.  
  56. predictions = model.predictAll(my_not_rated_with_user_id).map(lambda r: (r[1], r[2]))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement