Guest User

Untitled

a guest
Nov 21st, 2018
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.45 KB | None | 0 0
  1. def parseLine(line):
  2. fields = line.split(',')
  3. age = int(fields[2])
  4. numFriends = int(fields[3])
  5. return (age,numFriends)
  6.  
  7. line = sparkCont.textFile("D:\ResearchInMotion\ml-100k\fakefriends.csv")
  8. rdd = line.map(parseLine)
  9. totalsByAge = rdd.mapValues(lambda x: (x, 1)).reduceByKey(lambda x, y: (x[0] + y[0], x[1] + y[1]))
  10. averagesByAge = totalsByAge.mapValues(lambda x: x[0] / x[1])
  11. results = averagesByAge.collect()
  12. for result in results:
  13. print(result)
Add Comment
Please, Sign In to add comment