Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # +---+-----+-----+
- # | id|d_var|d_val|
- # +---+-----+-----+
- # |a01| 112| null|
- # |a01| 113| 0|
- # |a02| 112| null|
- # |a02| 113| 0|
- # +---+-----+-----+
- dfA = spark.createDataFrame(
- [
- ('a01', '112', None),
- ('a01', '113', '0'),
- ('a02', '112', None),
- ('a02', '113', '0')
- ],
- ('id', 'd_var', 'd_val')
- )
- # +---+-----+-----+------+-----+
- # | id|d_var|d_val|c_type|c_val|
- # +---+-----+-----+------+-----+
- # |a01| 112| null| red| 1|
- # |a01| 113| 0| red| 1|
- # +---+-----+-----+------+-----+
- dfB = spark.createDataFrame(
- [
- ('a01', '112', None, 'red', '1'),
- ('a01', '113', '0', 'red', '1')
- ],
- ('id', 'd_var', 'd_val', 'c_type', 'c_val')
- )
- static_cols = dfB.columns[:3]
- dfA.join(dfB, static_cols, how='left').orderBy('id', 'd_var').show()
- # +---+-----+-----+------+-----+
- # | id|d_var|d_val|c_type|c_val|
- # +---+-----+-----+------+-----+
- # |a01| 112| null| null| null| <-
- # |a01| 113| 0| red| 1|
- # |a02| 112| null| null| null|
- # |a02| 113| 0| null| null|
- # +---+-----+-----+------+-----+
- # +---+-----+-----+------+-----+
- # | id|d_var|d_val|c_type|c_val|
- # +---+-----+-----+------+-----+
- # |a01| 112| null| red| 1| <-
- # |a01| 113| 0| red| 1|
- # |a02| 112| null| null| null|
- # |a02| 113| 0| null| null|
- # +---+-----+-----+------+-----+
Add Comment
Please, Sign In to add comment