Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import spark.implicits._
- import org.apache.spark.sql.Dataset
- final case class Foo(k:String)
- final case class Bar(k:String, b:Boolean)
- val dsFoo:Dataset[Foo] = spark.createDataset(List(Foo("a"), Foo("x")))
- val dsBar:Dataset[Bar] = spark.createDataset(List(Bar("a", true), Bar("a", false), Bar("x", true)))
- //val dsBar:Dataset[Bar] = spark.createDataset(List(Bar("a", true), Bar("a", false))) //note no element to link to Foo("x")
- val dsFooBar = dsFoo.joinWith(dsBar, dsFoo("k") === dsBar("k"), "leftOuter")
- val dsGrouped = dsFooBar
- .groupByKey(_._1) // we want Foos with all their Bars
- .mapValues(_._2) //we don't need the Foos in the values because we have them in the keys
- dsGrouped.mapGroups( (foo:Foo, bars:Iterator[Bar]) => foo -> bars.toList).show //Baaaam explodes because (I think) Foo("x") has no Bar
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement