Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- users1 = [
- {
- "email":"alovett0@nsw.gov.au",
- "first_name":"Aundrea",
- "last_name":"Lovett",
- "gender":"Male",
- "ip_address":"62.72.1.143"
- },
- {
- "email":"bjowling1@spiegel.de",
- "first_name":"Bettine",
- "last_name":"Jowling",
- "gender":"Female",
- "ip_address":"26.250.197.47"
- }
- ]
- from pyspark.sql import Row
- users1_df = spark.createDataFrame([Row(**user) for user in users1])
- users2 = [{
- "email":"lbutland7@time.com",
- "first_name":"Lilas",
- "last_name":"Butland",
- "gender":"Female",
- "ip_address":"109.110.131.151"
- },
- {
- "email":"bjowling1@spiegel.de",
- "first_name":"Putnam",
- "last_name":"Alfonsetti",
- "gender":"Female",
- "ip_address":"167.97.48.246"
- }
- ]
- users2_df = spark.createDataFrame([Row(**user) for user in users2])
- users1_df. \
- join(users2_df, users1_df.email == users2_df.email, 'full_outer'). \
- show()
- users1_df. \
- join(users2_df, 'email', 'full_outer'). \
- show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement