Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def topTenMoviesOneliner(rdd: RDD[(Int, ((String, Int, Int), Iterable[(String, Int, String)]))]) : RDD[(Int, Array[(String, Float)])] = {
- return rdd.map(x => (x._1, (x._2._1._3, x._2._2))) // take userID, age, Set(movie, rating, genre)
- .values // drop userID
- .flatMap(x => x._2.map(y => ((x._1, y._1), y._2))).groupByKey // (age, movie), rating i.e. drop genre
- .mapValues(x => (x.reduce(_ + _).toFloat / x.size)) // calculate avg ratings
- .map(x => (x._1._1, (x._1._2, x._2))).groupByKey // reorganize to have age group as first, then group by it
- .map(x => (x._1, x._2.toArray.sortBy(_._2)(Ordering[Float].reverse) // make an array of Set(movie, rating), order it by rating
- .take(10))).sortByKey() //for each age group take 10 then sort the resulting RDD by age group
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement