Advertisement
Guest User

Untitled

a guest
Feb 17th, 2020
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.57 KB | None | 0 0
  1. val name = sc.textFile("spark5/EmployeeName.csv")
  2. val namePairRDD = name.map(x => (x.split(",")(0), x.split(",")(1)))
  3.  
  4. val salary = sc.textFile("spark5/EmployeeSalary.csv")
  5. val salaryPairRDD = salary.map(x => (x.split(",")(0), x.split(",")(1)))
  6.  
  7. val joined = namePairRDD.join(salaryPairRDD)
  8. val keyRemoved = joined.values
  9. val swapped = keyRemoved.map(_.swap)
  10.  
  11. val groupByKey = swapped.groupByKey().collect()
  12.  
  13. val rddByKey = groupByKey.map {
  14. case (k, v) => k -> sc.makeRDD(v.toSeq)
  15. }
  16.  
  17. rddByKey.foreach { case (k, rdd) => rdd.saveAsTextFile("spark5/Employee" + k) }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement