Advertisement
Guest User

Untitled

a guest
Mar 18th, 2019
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.15 KB | None | 0 0
  1. import pyspark.sql.functions as F
  2. df = sqlContext.createDataFrame([
  3. (1, "A", "X1"),
  4. (2, None, "X2"),
  5. (3, "B", None),
  6. (1, "", "X3"),
  7. (2, "", "X2"),
  8. (3, "C", "X2"),
  9. (1, None, None),
  10. (1, "", ""),
  11. (1, "X3", "X8"),
  12. ], ["ID", "TYPE", "CODE"])
  13. df.show()
  14.  
  15. df.na.drop('any').show()
  16.  
  17. """
  18. +---+----+----+
  19. | ID|TYPE|CODE|
  20. +---+----+----+
  21. | 1| A| X1|
  22. | 1| | X3|
  23. | 2| | X2|
  24. | 3| C| X2|
  25. | 1| | |
  26. +---+----+----+
  27. """
  28.  
  29. df.na.drop('all').show()
  30.  
  31. """
  32. +---+----+----+
  33. | ID|TYPE|CODE|
  34. +---+----+----+
  35. | 1| A| X1|
  36. | 2|null| X2|
  37. | 3| B|null|
  38. | 1| | X3|
  39. | 2| | X2|
  40. | 3| C| X2|
  41. | 1|null|null|
  42. | 1| | |
  43. +---+----+----+
  44. """
  45.  
  46. df.na.drop("all", subset=["TYPE", "CODE"]).show()
  47. """
  48. +---+----+----+
  49. | ID|TYPE|CODE|
  50. +---+----+----+
  51. | 1| A| X1|
  52. | 2|null| X2|
  53. | 3| B|null|
  54. | 1| | X3|
  55. | 2| | X2|
  56. | 3| C| X2|
  57. | 1| | |
  58. | 1| X3| X8|
  59. +---+----+----+
  60. """
  61.  
  62. df.na.drop("any", subset=["ID", "TYPE"]).show()
  63. """
  64. +---+----+----+
  65. | ID|TYPE|CODE|
  66. +---+----+----+
  67. | 1| A| X1|
  68. | 3| B|null|
  69. | 1| | X3|
  70. | 2| | X2|
  71. | 3| C| X2|
  72. | 1| | |
  73. | 1| X3| X8|
  74. +---+----+----+
  75. """
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement