Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pyspark
- import pyspark.mllib.recommendation as reco
- try:
- sc = pyspark.SparkContext('local[*]')
- except:
- sc = sc
- #read ratings file as lines of text, assuming no errors on data
- lines = sc.textFile("ml-1m/ratings.dat")
- ratings = lines.map(lambda l: l.split("::")). \
- map(lambda p: reco.Rating( user=int(p[0]), product=int(p[1]), rating=int(p[2])) )
- lines = sc.textFile("ml-1m/movies.dat")
- movies = lines.map(lambda l: l.split("::")).map(lambda p: (int(p[0]),p[1], p[2].split('|') ))
- _movies = sc.parallelize(movies.sortBy(lambda a: a[2]).take(20))
- for m in _movies.collect():
- print(m)
- my_user_id = 0
- my_ratings = [
- (my_user_id, 9, 5 ),
- (my_user_id, 20, 5 ),
- (my_user_id, 71, 4 ),
- (my_user_id, 145, 5 ),
- (my_user_id, 204, 4 ),
- (my_user_id, 227, 5 ),
- (my_user_id, 251, 5 ),
- (my_user_id, 315, 5 ),
- (my_user_id, 384, 4),
- (my_user_id, 393, 4)]
- new_ratings = sc.parallelize(my_ratings).map(lambda p: reco.Rating( user=int(p[0]), product=int(p[1]), rating=int(p[2])) )
- ratings = ratings.union(new_ratings)
- # calculate how many ratings has each movie
- moviecounts = ratings.map(lambda r: (r[1], 1)).reduceByKey(lambda a,b: a+b)
- # filter by ratings count
- too_few_ratings = moviecounts.filter(lambda x: x[1]<30)
- # get only movies ID's
- too_few_ratings = too_few_ratings.map(lambda x: x[0]).collect()
- iterations = 5;
- rank = 8;
- lambd = 0.3;
- #model calculation
- model = reco.ALS.train(ratings, rank, iterations, lambd, nonnegative=True, seed=10)
- my_not_rated = _movies.filter(lambda x: x[0] not in [row[1] for row in my_ratings]) #remove my rated
- my_not_rated = my_not_rated.filter(lambda x: x[0] not in too_few_ratings) #remove with too few ratings
- my_not_rated_with_user_id = my_not_rated.map(lambda p: (my_user_id, p[0])) #add userid
- predictions = model.predictAll(my_not_rated_with_user_id).map(lambda r: (r[1], r[2]))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement