Guest User

Untitled

a guest
Oct 17th, 2017
377
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.40 KB | None | 0 0
  1. +-----+--------+---------+
  2. | usn|log_type|item_code|
  3. +-----+--------+---------+
  4. | 0| 11| I0938|
  5. | 916| 19| I0009|
  6. | 916| 51| I1097|
  7. | 916| 19| C0723|
  8. | 916| 19| I0010|
  9. | 916| 19| I0010|
  10. |12331| 19| C0117|
  11. |12331| 19| C0117|
  12. |12331| 19| I0009|
  13. |12331| 19| I0009|
  14. |12331| 19| I0010|
  15. |12838| 19| I1067|
  16. |12838| 19| I1067|
  17. |12838| 19| C1083|
  18. |12838| 11| B0250|
  19. |12838| 19| C1346|
  20. +-----+--------+---------+
  21.  
  22. +---------+------+
  23. |item_code| numId|
  24. +---------+------+
  25. | I0938| 0 |
  26. | I0009| 1 |
  27. | I1097| 2 |
  28. | C0723| 3 |
  29. | I0010| 4 |
  30. | C0117| 5 |
  31. | I1067| 6 |
  32. | C1083| 7 |
  33. | B0250| 8 |
  34. | C1346| 9 |
  35. +---------+------+
  36.  
  37. val spark = SparkSession.builder.getOrCreate()
  38. import spark.implicits._
  39.  
  40. val df = Seq("I0938","I0009","I1097","C0723","I0010","I0010",
  41. "C0117","C0117","I0009","I0009","I0010","I1067",
  42. "I1067","C1083","B0250","C1346")
  43. .toDF("item_code")
  44.  
  45. val df2 = df.distinct.rdd
  46. .map{case Row(item: String) => item}
  47. .zipWithIndex()
  48. .toDF("item_code", "numId")
  49.  
  50. +---------+-----+
  51. |item_code|numId|
  52. +---------+-----+
  53. | I0010| 0|
  54. | I1067| 1|
  55. | C0117| 2|
  56. | I0009| 3|
  57. | I1097| 4|
  58. | C1083| 5|
  59. | I0938| 6|
  60. | C0723| 7|
  61. | B0250| 8|
  62. | C1346| 9|
  63. +---------+-----+
Add Comment
Please, Sign In to add comment