Advertisement
Guest User

Untitled

a guest
Mar 14th, 2024
35
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.09 KB | Software | 0 0
  1. users1 = [
  2.     {
  3.         "email":"alovett0@nsw.gov.au",
  4.         "first_name":"Aundrea",
  5.         "last_name":"Lovett",
  6.         "gender":"Male",
  7.         "ip_address":"62.72.1.143"
  8.     },
  9.     {
  10.         "email":"bjowling1@spiegel.de",
  11.         "first_name":"Bettine",
  12.         "last_name":"Jowling",
  13.         "gender":"Female",
  14.         "ip_address":"26.250.197.47"
  15.     }
  16. ]
  17.  
  18. from pyspark.sql import Row
  19. users1_df = spark.createDataFrame([Row(**user) for user in users1])
  20.  
  21. users2 = [{
  22.         "email":"lbutland7@time.com",
  23.         "first_name":"Lilas",
  24.         "last_name":"Butland",
  25.         "gender":"Female",
  26.         "ip_address":"109.110.131.151"
  27.     },
  28.     {
  29.         "email":"bjowling1@spiegel.de",
  30.         "first_name":"Putnam",
  31.         "last_name":"Alfonsetti",
  32.         "gender":"Female",
  33.         "ip_address":"167.97.48.246"
  34.     }
  35. ]
  36.  
  37. users2_df = spark.createDataFrame([Row(**user) for user in users2])
  38.  
  39. users1_df. \
  40.     join(users2_df, users1_df.email == users2_df.email, 'full_outer'). \
  41.     show()
  42.  
  43. users1_df. \
  44.     join(users2_df, 'email', 'full_outer'). \
  45.     show()
  46.        
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement