Guest User

Untitled

a guest
Mar 22nd, 2018
278
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.42 KB | None | 0 0
  1. val pairRDD1 = sc.parallelize(List( ("cat",2), ("girl", 5), ("book", 4),("Tom", 12)))
  2. val pairRDD2 = sc.parallelize(List( ("cat",2), ("cup", 5), ("mouse", 4),("girl", 12)))
  3.  
  4. val kk = pairRDD1.fullOuterJoin(pairRDD2).collect
  5.  
  6. kk: Array[(String, (Option[Int], Option[Int]))] = Array((book,(Some(4),None)), (Tom,(Some(12),None)), (girl,(Some(5),Some(12))), (mouse,(None,Some(4))), (cup,(None,Some(5))), (cat,(Some(2),Some(2))))
  7.  
  8. pairRDD1.fullOuterJoin(pairRDD2).mapValues(pair => (pair._1.getOrElse(0), pair._2.getOrElse(0)))
  9.  
  10. kk.map { case (k, pair) => (k, (pair._1.getOrElse(0), pair._2.getOrElse(0))) }
  11.  
  12. val ss = SparkSession.builder().master("local[*]").getOrCreate()
  13. val sc = ss.sparkContext
  14.  
  15. import ss.implicits._
  16.  
  17. val pairRDD1 = sc.parallelize(List(("cat", 2,9999), ("girl", 5,8888), ("book", 4,9999), ("Tom", 12,6666)))
  18. val pairRDD2 = sc.parallelize(List(("cat", 2,9999), ("cup", 5,7777), ("mouse", 4,3333), ("girl", 12,1111)))
  19.  
  20. val df1 = pairRDD1.toDF
  21. val df2 = pairRDD2.toDF
  22.  
  23. val joined = df1.join(df2, df1.col("_1") === df2.col("_1"),"fullouter")
  24. joined.show()
  25.  
  26. +----+----+----+-----+----+----+
  27. | _1| _2| _3| _1| _2| _3|
  28. +----+----+----+-----+----+----+
  29. |girl| 5|8888| girl| 12|1111|
  30. | Tom| 12|6666| null|null|null|
  31. | cat| 2|9999| cat| 2|9999|
  32. |null|null|null| cup| 5|7777|
  33. |null|null|null|mouse| 4|3333|
  34. |book| 4|9999| null|null|null|
  35. +----+----+----+-----+----+----+
Add Comment
Please, Sign In to add comment