Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from random import randint
- df.fillna(randint(14, 46), 'age').show()
- import pyspark.sql.functions as F
- from pyspark.sql.functions import lit
- from pyspark.sql.types import IntegerType
- from random import randint
- df = sqlContext.createDataFrame(
- [(1, "a", 23.0), (3, "B", -23.0)], ("x1", "x2", "x3"))
- df = (df
- .withColumn("x4", F.lit(None).cast(IntegerType()))
- .withColumn("x5", F.lit(None).cast(IntegerType()))
- )
- df.na.fill({'x4':randint(0,100)}).show()
- df.withColumn('x5', F.coalesce(F.col('x5'), (F.round(F.rand()*100)))).show()
- +---+---+-----+---+----+
- | x1| x2| x3| x4| x5|
- +---+---+-----+---+----+
- | 1| a| 23.0| 9|null|
- | 3| B|-23.0| 9|null|
- +---+---+-----+---+----+
- +---+---+-----+----+----+
- | x1| x2| x3| x4| x5|
- +---+---+-----+----+----+
- | 1| a| 23.0|null|44.0|
- | 3| B|-23.0|null| 2.0|
- +---+---+-----+----+----+
Add Comment
Please, Sign In to add comment