Advertisement
Guest User

Untitled

a guest
Jan 16th, 2019
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 0.82 KB | None | 0 0
  1. import spark.implicits._
  2. import org.apache.spark.sql.Dataset
  3.  
  4. final case class Foo(k:String)
  5. final case class Bar(k:String, b:Boolean)
  6.  
  7. val dsFoo:Dataset[Foo] = spark.createDataset(List(Foo("a"), Foo("x")))
  8. val dsBar:Dataset[Bar] = spark.createDataset(List(Bar("a", true), Bar("a", false), Bar("x", true)))
  9. //val dsBar:Dataset[Bar] = spark.createDataset(List(Bar("a", true), Bar("a", false))) //note no element to link to Foo("x")
  10.  
  11. val dsFooBar = dsFoo.joinWith(dsBar, dsFoo("k") === dsBar("k"), "leftOuter")
  12.  
  13. val dsGrouped = dsFooBar
  14.     .groupByKey(_._1) // we want Foos with all their Bars
  15.     .mapValues(_._2) //we don't need the Foos in the values because we have them in the keys
  16.  
  17. dsGrouped.mapGroups( (foo:Foo, bars:Iterator[Bar]) => foo -> bars.toList).show //Baaaam explodes because (I think) Foo("x") has no Bar
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement