Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pyspark.sql.functions as F
- df = sqlContext.createDataFrame([
- (1, "A", "X1"),
- (2, None, "X2"),
- (3, "B", None),
- (1, "", "X3"),
- (2, "", "X2"),
- (3, "C", "X2"),
- (1, None, None),
- (1, "", ""),
- (1, "X3", "X8"),
- ], ["ID", "TYPE", "CODE"])
- df.show()
- df.na.drop('any').show()
- """
- +---+----+----+
- | ID|TYPE|CODE|
- +---+----+----+
- | 1| A| X1|
- | 1| | X3|
- | 2| | X2|
- | 3| C| X2|
- | 1| | |
- +---+----+----+
- """
- df.na.drop('all').show()
- """
- +---+----+----+
- | ID|TYPE|CODE|
- +---+----+----+
- | 1| A| X1|
- | 2|null| X2|
- | 3| B|null|
- | 1| | X3|
- | 2| | X2|
- | 3| C| X2|
- | 1|null|null|
- | 1| | |
- +---+----+----+
- """
- df.na.drop("all", subset=["TYPE", "CODE"]).show()
- """
- +---+----+----+
- | ID|TYPE|CODE|
- +---+----+----+
- | 1| A| X1|
- | 2|null| X2|
- | 3| B|null|
- | 1| | X3|
- | 2| | X2|
- | 3| C| X2|
- | 1| | |
- | 1| X3| X8|
- +---+----+----+
- """
- df.na.drop("any", subset=["ID", "TYPE"]).show()
- """
- +---+----+----+
- | ID|TYPE|CODE|
- +---+----+----+
- | 1| A| X1|
- | 3| B|null|
- | 1| | X3|
- | 2| | X2|
- | 3| C| X2|
- | 1| | |
- | 1| X3| X8|
- +---+----+----+
- """
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement