Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- val name = sc.textFile("spark5/EmployeeName.csv")
- val namePairRDD = name.map(x => (x.split(",")(0), x.split(",")(1)))
- val salary = sc.textFile("spark5/EmployeeSalary.csv")
- val salaryPairRDD = salary.map(x => (x.split(",")(0), x.split(",")(1)))
- val joined = namePairRDD.join(salaryPairRDD)
- val keyRemoved = joined.values
- val swapped = keyRemoved.map(_.swap)
- val groupByKey = swapped.groupByKey().collect()
- val rddByKey = groupByKey.map {
- case (k, v) => k -> sc.makeRDD(v.toSeq)
- }
- rddByKey.foreach { case (k, rdd) => rdd.saveAsTextFile("spark5/Employee" + k) }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement