Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package sparkdemo;
- import java.util.Arrays;
- import org.apache.spark.SparkConf;
- import org.apache.spark.api.java.JavaPairRDD;
- import org.apache.spark.api.java.JavaRDD;
- import org.apache.spark.api.java.JavaSparkContext;
- import scala.Tuple2;
- public class SparkDemo {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setMaster("local").setAppName("Word Count");
- JavaSparkContext sc = new JavaSparkContext(conf);
- JavaRDD<String> textFile = sc.textFile("hdfs:/user/maria_dev/ratings.csv");
- JavaPairRDD<String, Integer> counts = textFile.mapToPair(line -> new Tuple2<>(line.split(",")[1], 1)).reduceByKey((a, b) -> a + b);
- JavaPairRDD<Integer, String> ranking = counts.mapToPair(e -> e.swap()).sortByKey();
- ranking.foreach(p -> System.out.println(p));
- System.out.println("Total words: " + ranking.count());
- //counts.saveAsTextFile("file:///home/maria_dev/tmp/shakespeareWordCount");
- sc.stop();
- sc.close();
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement