Guest User

Untitled

a guest
Sep 21st, 2018
45
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.19 KB | None | 0 0
  1. spark.conf.set("fs.azure.account.key.<ACCOUNT_NAME>.dfs.core.windows.net", "<ACCOUNT_KEY>")
  2. spark.conf.set("fs.azure.createRemoteFileSystemDuringInitialization", "true")
  3. dbutils.fs.ls("abfs://<FILE_SYSTEM_NAME>@<ACCOUNT_NAME>.dfs.core.windows.net/")
  4. spark.conf.set("fs.azure.createRemoteFileSystemDuringInitialization", "false")
  5.  
  6. dbutils.widgets.text("storage_account_name", "STORAGE_ACCOUNT_NAME", "<YOUR_STORAGE_ACCOUNT_NAME>")
  7. dbutils.widgets.text("storage_account_access_key", "YOUR_ACCESS_KEY", "<YOUR_STORAGE_ACCOUNT_SHARED_KEY>")
  8.  
  9. val df = spark.read.json("abfs://<FILE_SYSTEM_NAME>@<ACCOUNT_NAME>.dfs.core.windows.net/data/small_radio_json.json")
  10.  
  11. df.show()
  12.  
  13. val specificColumnsDf = df.select("firstname", "lastname", "gender", "location", "level")
  14.  
  15. val renamedColumnsDF = specificColumnsDf.withColumnRenamed("level", "subscription_type")
  16. renamedColumnsDF.show()
  17.  
  18. val storageURI = "<STORAGE_ACCOUNT_NAME>.dfs.core.windows.net"
  19. val fileSystemName = "<FILE_SYSTEM_NAME>"
  20. val accessKey = "<ACCESS_KEY>"
  21.  
  22. val tempDir = "abfs://" + fileSystemName + "@" + storageURI +"/tempDirs"
  23.  
  24. val acntInfo = "fs.azure.account.key."+ storageURI
  25. sc.hadoopConfiguration.set(acntInfo, accessKey)
  26.  
  27. //SQL Data Warehouse related settings
  28. val dwDatabase = "<DATABASE NAME>"
  29. val dwServer = "<DATABASE SERVER NAME>"
  30. val dwUser = "<USER NAME>"
  31. val dwPass = "<PASSWORD>"
  32. val dwJdbcPort = "1433"
  33. val dwJdbcExtraOptions = "encrypt=true;trustServerCertificate=true;hostNameInCertificate=*.database.windows.net;loginTimeout=30;"
  34. val sqlDwUrl = "jdbc:sqlserver://" + dwServer + ".database.windows.net:" + dwJdbcPort + ";database=" + dwDatabase + ";user=" + dwUser+";password=" + dwPass + ";$dwJdbcExtraOptions"
  35. val sqlDwUrlSmall = "jdbc:sqlserver://" + dwServer + ".database.windows.net:" + dwJdbcPort + ";database=" + dwDatabase + ";user=" + dwUser+";password=" + dwPass
  36.  
  37. spark.conf.set(
  38. "spark.sql.parquet.writeLegacyFormat",
  39. "true")
  40.  
  41. renamedColumnsDF.write
  42. .format("com.databricks.spark.sqldw")
  43. .option("url", sqlDwUrlSmall)
  44. .option("dbtable", "SampleTable")
  45. .option( "forward_spark_azure_storage_credentials","True")
  46. .option("tempdir", tempDir)
  47. .mode("overwrite")
  48. .save()
Add Comment
Please, Sign In to add comment