Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def parseLine(line):
- fields = line.split(',')
- age = int(fields[2])
- numFriends = int(fields[3])
- return (age,numFriends)
- line = sparkCont.textFile("D:\ResearchInMotion\ml-100k\fakefriends.csv")
- rdd = line.map(parseLine)
- totalsByAge = rdd.mapValues(lambda x: (x, 1)).reduceByKey(lambda x, y: (x[0] + y[0], x[1] + y[1]))
- averagesByAge = totalsByAge.mapValues(lambda x: x[0] / x[1])
- results = averagesByAge.collect()
- for result in results:
- print(result)
Add Comment
Please, Sign In to add comment