Advertisement
Guest User

Untitled

a guest
Dec 8th, 2019
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.97 KB | None | 0 0
  1. package sparkdemo;
  2.  
  3. import java.util.Arrays;
  4.  
  5. import org.apache.spark.SparkConf;
  6. import org.apache.spark.api.java.JavaPairRDD;
  7. import org.apache.spark.api.java.JavaRDD;
  8. import org.apache.spark.api.java.JavaSparkContext;
  9.  
  10. import scala.Tuple2;
  11.  
  12. public class SparkDemo {
  13.  
  14. public static void main(String[] args) {
  15. SparkConf conf = new SparkConf().setMaster("local").setAppName("Word Count");
  16. JavaSparkContext sc = new JavaSparkContext(conf);
  17. JavaRDD<String> textFile = sc.textFile("hdfs:/user/maria_dev/ratings.csv");
  18. JavaPairRDD<String, Integer> counts = textFile.mapToPair(line -> new Tuple2<>(line.split(",")[1], 1)).reduceByKey((a, b) -> a + b);
  19. JavaPairRDD<Integer, String> ranking = counts.mapToPair(e -> e.swap()).sortByKey();
  20. ranking.foreach(p -> System.out.println(p));
  21. System.out.println("Total words: " + ranking.count());
  22. //counts.saveAsTextFile("file:///home/maria_dev/tmp/shakespeareWordCount");
  23. sc.stop();
  24. sc.close();
  25. }
  26.  
  27. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement