Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- val inputRDD = sc.textFile(path = ".../fakefriends.csv")
- def parseLineIntoTuple(line: String): (Int, Int) = {
- val parts = line.split(",")
- val age = parts(2).toInt
- val numFriends = parts(3).toInt
- (age, numFriends)
- }
- val avgFriendsByAge = inputRDD
- .map(parseLineIntoTuple) // (age, friends)
- .mapValues(v => (v, 1)) // (age, (friends, 1))
- .reduceByKey((v1, v2) => (v1._1 + v2._1, v1._2 + v2._2)) // (age, (f1+f2+..., 1+1...) sum freinds, count freq
- .mapValues(v => v._1 / v._2) // (age, sum / count)
- .sortBy(t => t._1) // by first thing in tuple (age)
- .collect() // RDD -> Array
- avgFriendsByAge.foreach(println(_))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement