Advertisement
Guest User

Untitled

a guest
Jun 20th, 2019
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.90 KB | None | 0 0
  1. val dataSet = spark.createDataFrame(Seq(
  2. (1, "shirt for women", Seq("shirt", "women"), 19.1, "ST"),
  3. (1, "shirt for women", Seq("shirt", "women"), 10.1, null),
  4. (1, "shirt for women", Seq("shirt", "women"), 12.1, null),
  5. (0, "shirt group women", Seq("group", "women"), 15.1, null),
  6. (0, "shirt group women", Seq("group", "women"), 12.1, null),
  7. (3, "shirt nmn women", Seq("shirt", "women"), 16.1, "ST"),
  8. (3, "shirt were women", Seq("shirt", "women"), 13.1, "ST")
  9. )).toDF("id", "raw", "filtered", "score", "types")
  10.  
  11. +---+-----------------+--------------+-----+-----+
  12. |id |raw |filtered |score|types|
  13. +---+-----------------+--------------+-----+-----+
  14. |1 |shirt for women |[shirt, women]|19.1 |ST |
  15. |1 |shirt for women |[shirt, women]|10.1 |null |
  16. |1 |shirt for women |[shirt, women]|12.1 |null |
  17. |0 |shirt group women|[group, women]|15.1 |null |
  18. |0 |shirt group women|[group, women]|12.1 |null |
  19. |3 |shirt nmn women |[shirt, women]|16.1 |ST |
  20. |3 |shirt were women |[shirt, women]|13.1 |ST |
  21. +---+-----------------+--------------+-----+-----+
  22.  
  23. +---+------------------+--------------+-----+----+
  24. |id |raw |filtered |score|types|
  25. +---+-----------------+--------------+-----+----+
  26. |1 |shirt for women |[shirt, women]|19.1 |ST |
  27. |1 |shirt for women |[shirt, women]|10.1 |NA |
  28. |1 |shirt for women |[shirt, women]|12.1 |null|
  29. |0 |shirt group women[women, group] |15.1 |null|
  30. |0 |shirt group women|[women, group]|12.1 |NA |
  31. |3 |shirt nmn women |[shirt, women]|16.1 |ST |
  32. |3 |shirt were women |[shirt, women]|13.1 |ST |
  33. +---+-----------------+--------------+-----+----+
  34.  
  35. data.withColumn("max_score",
  36. when(col("types").isNull,
  37. max("score")
  38. .over(Window.partitionBy("id", "filtered")))
  39. .otherwise($"score"))
  40. .withColumn("type_temp",
  41. when(col("score") =!= col("max_score"),
  42. addReasonsUDF(col("type"),
  43. lit("NA")))
  44. .otherwise(col("type")))
  45. .drop("types", "max_score")
  46. .withColumnRenamed("type_temp", "types")
  47.  
  48. +---+-----------------+--------------+-----+---------+-----+
  49. |id |raw |filtered |score|max_score|types|
  50. +---+-----------------+--------------+-----+---------+-----+
  51. |1 |shirt for women |[shirt, women]|19.1 |19.1 |ST |
  52. |1 |shirt women |[shirt, women]|10.1 |19.1 |NA |
  53. |1 |shirt of women |[shirt, women]|12.1 |19.1 |NA |
  54. |0 |shirt group women|[group, women]|15.1 |15.1 |null |
  55. |0 |shirt will women |[group, women]|12.1 |15.1 |NA |
  56. |3 |shirt nmn women |[shirt, women]|16.1 |16.1 |ST |
  57. |3 |shirt were women |[shirt, women]|13.1 |13.1 |ST |
  58. +---+-----------------+--------------+-----+---------+-----+
  59.  
  60. def addReasons(oldreason: String, newreason: String): String = {
  61.  
  62. if (checkIfEmpty(oldreason) && checkIfEmpty(newreason)) {
  63. null
  64. } else if (checkIfEmpty(oldreason)) {
  65. newreason
  66. } else if (checkIfEmpty(newreason)) {
  67. oldreason
  68. } else {
  69. Set(oldreason, newreason).mkString(",")
  70. }
  71. }
  72.  
  73. dataSet.withColumn("max_score",
  74. when(col("types").isNull,
  75. max("score").over(Window.partitionBy("id", "raw")))
  76. .otherwise($"score")).show(false)
  77.  
  78. +---+-----------------+--------------+-----+-----+---------+
  79. |id |raw |filtered |score|types|max_score|
  80. +---+-----------------+--------------+-----+-----+---------+
  81. |3 |shirt nmn women |[shirt, women]|16.1 |ST |16.1 |
  82. |0 |shirt group women|[group, women]|15.1 |null |15.1 |
  83. |0 |shirt group women|[group, women]|12.1 |null |15.1 |
  84. |3 |shirt were women |[shirt, women]|13.1 |ST |13.1 |
  85. |1 |shirt for women |[shirt, women]|19.1 |ST |19.1 |
  86. |1 |shirt for women |[shirt, women]|10.1 |null |19.1 |
  87. |1 |shirt for women |[shirt, women]|12.1 |null |19.1 |
  88. +---+-----------------+--------------+-----+-----+---------+
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement