Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- +--------+
- |some_num|
- +--------+
- | 3|
- | 24|
- +--------+
- df.withColumn(
- "bucket",
- bucketFinder(
- col("some_num"),
- Array(
- (0, 10),
- (10, 20),
- (20, 30)
- )
- )
- ).show()
- +--------+------+
- |some_num|bucket|
- +--------+------+
- | 3| 0-10|
- | 24| 20-30|
- +--------+------+
- def bucketFinder(col: Column, buckets: Array[(Any, Any)]): Column = {
- buckets.foreach { res: (Any, Any) =>
- when(col.between(res._1, res._2), lit(s"$res._1 - $res._2"))
- }
- }
- val bucket_size = 10
- val floor_col = floor(df("some_num") / bucket_size) * bucket_size
- df.withColumn("bucket", concat_ws("-", floor_col, floor_col + bucket_size)).show
- +--------+------+
- |some_num|bucket|
- +--------+------+
- | 3| 0-10|
- | 24| 20-30|
- val bucket_size1 = 5
- val floor_col = floor(df("some_num") / bucket_size1) * bucket_size1
- df.withColumn("bucket", concat_ws("-", floor_col, floor_col + bucket_size1)).show
- +--------+------+
- |some_num|bucket|
- +--------+------+
- | 3| 0-5|
- | 24| 20-25|
Add Comment
Please, Sign In to add comment