Advertisement
Guest User

Scala Code

a guest
Nov 2nd, 2014
175
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 0.69 KB | None | 0 0
  1. package edu.gatech.cse6242
  2.  
  3. import org.apache.spark.SparkContext
  4. import org.apache.spark.SparkContext._
  5. import org.apache.spark.SparkConf
  6.  
  7. object Task2 {
  8.   def main(args: Array[String]) {
  9.     val sc = new SparkContext(new SparkConf().setAppName("Task2"))
  10.  
  11.     // read the file
  12.     val file = sc.textFile("hdfs://localhost:8020" + args(0))
  13.  
  14.     /* need to be implemented */
  15.      val tokenized = file.map(_.split("\t"))    
  16.      val wordCounts = tokenized.map(x => (x(1),x(2).toInt)).reduceByKey(_ + _)
  17.      wordCounts.collect().mkString("\t")  
  18.  
  19.     // store output on given HDFS path.
  20.     // YOU NEED TO CHANGE THIS
  21.     file.saveAsTextFile("hdfs://localhost:8020" + args(1))
  22.   }
  23. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement