Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package sparkdemo;
- import java.util.Arrays;
- import org.apache.spark.SparkConf;
- import org.apache.spark.api.java.JavaPairRDD;
- import org.apache.spark.api.java.JavaRDD;
- import org.apache.spark.api.java.JavaSparkContext;
- import scala.Tuple2;
- public class SparkDemo {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setMaster("local").setAppName("Word Count");
- JavaSparkContext sc = new JavaSparkContext(conf);
- JavaRDD<String> textFile = sc.textFile("./ratings.csv");
- JavaRDD<String> titleFile = sc.textFile("./movies.csv");
- //count ratings
- JavaPairRDD<String, Integer> counts = textFile.mapToPair(line -> new Tuple2<>(line.split(",")[1], 1)).reduceByKey((a, b) -> a + b);
- //sum ratings
- JavaPairRDD<String, Double> ratingSum = textFile.mapToPair(line -> new Tuple2<>(line.split(",")[1], Double.valueOf(line.split(",")[2]))).reduceByKey((a, b) -> a + b);
- //join counts and sum
- JavaPairRDD<String, Tuple2<Integer, Double>> joinedRDD = counts.join(ratingSum);
- //count avg
- JavaPairRDD<String, Double> avgRDD = joinedRDD.mapToPair(entry -> new Tuple2<>(entry._1, (entry._2._2 / entry._2._1)));
- //get titles
- JavaPairRDD<String, String> titles = titleFile.mapToPair(line -> new Tuple2<>(line.split(",")[0], line.split(",")[1]));
- //assing titles
- JavaPairRDD<String, Tuple2<Double, String>> assignedRDD = avgRDD.join(titles);
- //Reduce Pairs
- JavaPairRDD<Double, String> finalRDD = assignedRDD.mapToPair(entry -> new Tuple2<>(entry._2._1, entry._2._2)).sortByKey();
- //Ranking
- finalRDD.foreach(p -> System.out.println(p));
- sc.stop();
- sc.close();
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement