Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- df = spark.sparkContext.parallelize([
- ("a", None), ("a", 1), ("a", -1), ("b", 3), ("b", 1)
- ]).toDF(["k", "v"])
- w = Window().partitionBy("k").orderBy('k','v')
- df.select(F.col("k"), F.last("v",True).over(w).alias('v')).show()
- +---+----+
- | k| v|
- +---+----+
- | b| 1|
- | b| 3|
- | a|null|
- | a| -1|
- | a| 1|
- +---+----+
- +---+----+
- | k| v|
- +---+----+
- | b| 3|
- | b| 3|
- | a| 1|
- | a| 1|
- | a| 1|
- +---+----+
- df.orderBy('k','v').show()
- +---+----+
- | k| v|
- +---+----+
- | a|null|
- | a| -1|
- | a| 1|
- | b| 1|
- | b| 3|
- +---+----+
- df.orderBy('k','v').groupBy('k').agg(F.first('v')).show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement