Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package edu.gatech.cse6242
- import org.apache.spark.SparkContext
- import org.apache.spark.SparkContext._
- import org.apache.spark.SparkConf
- object Task2 {
- def main(args: Array[String]) {
- val sc = new SparkContext(new SparkConf().setAppName("Task2"))
- // read the file
- val file = sc.textFile("hdfs://localhost:8020" + args(0))
- /* need to be implemented */
- val tokenized = file.map(_.split("\t"))
- val wordCounts = tokenized.map(x => (x(1),x(2).toInt)).reduceByKey(_ + _)
- wordCounts.collect().mkString("\t")
- // store output on given HDFS path.
- // YOU NEED TO CHANGE THIS
- file.saveAsTextFile("hdfs://localhost:8020" + args(1))
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement