Advertisement
Guest User

Untitled

a guest
May 13th, 2016
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.86 KB | None | 0 0
  1. val inputRDD = sc.textFile(path = ".../fakefriends.csv")
  2.  
  3. def parseLineIntoTuple(line: String): (Int, Int) = {
  4. val parts = line.split(",")
  5. val age = parts(2).toInt
  6. val numFriends = parts(3).toInt
  7. (age, numFriends)
  8. }
  9.  
  10. val avgFriendsByAge = inputRDD
  11. .map(parseLineIntoTuple) // (age, friends)
  12. .mapValues(v => (v, 1)) // (age, (friends, 1))
  13. .reduceByKey((v1, v2) => (v1._1 + v2._1, v1._2 + v2._2)) // (age, (f1+f2+..., 1+1...) sum freinds, count freq
  14. .mapValues(v => v._1 / v._2) // (age, sum / count)
  15. .sortBy(t => t._1) // by first thing in tuple (age)
  16. .collect() // RDD -> Array
  17.  
  18. avgFriendsByAge.foreach(println(_))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement