Guest User

Untitled

a guest
Jun 13th, 2018
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.02 KB | None | 0 0
  1. +--------+
  2. |some_num|
  3. +--------+
  4. | 3|
  5. | 24|
  6. +--------+
  7.  
  8. df.withColumn(
  9. "bucket",
  10. bucketFinder(
  11. col("some_num"),
  12. Array(
  13. (0, 10),
  14. (10, 20),
  15. (20, 30)
  16. )
  17. )
  18. ).show()
  19.  
  20. +--------+------+
  21. |some_num|bucket|
  22. +--------+------+
  23. | 3| 0-10|
  24. | 24| 20-30|
  25. +--------+------+
  26.  
  27. def bucketFinder(col: Column, buckets: Array[(Any, Any)]): Column = {
  28.  
  29. buckets.foreach { res: (Any, Any) =>
  30. when(col.between(res._1, res._2), lit(s"$res._1 - $res._2"))
  31. }
  32.  
  33. }
  34.  
  35. val bucket_size = 10
  36. val floor_col = floor(df("some_num") / bucket_size) * bucket_size
  37.  
  38. df.withColumn("bucket", concat_ws("-", floor_col, floor_col + bucket_size)).show
  39. +--------+------+
  40. |some_num|bucket|
  41. +--------+------+
  42. | 3| 0-10|
  43. | 24| 20-30|
  44.  
  45. val bucket_size1 = 5
  46. val floor_col = floor(df("some_num") / bucket_size1) * bucket_size1
  47.  
  48. df.withColumn("bucket", concat_ws("-", floor_col, floor_col + bucket_size1)).show
  49. +--------+------+
  50. |some_num|bucket|
  51. +--------+------+
  52. | 3| 0-5|
  53. | 24| 20-25|
Add Comment
Please, Sign In to add comment