Advertisement
Guest User

Untitled

a guest
Jul 15th, 2019
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.68 KB | None | 0 0
  1. TransactionID MerchantStore MerchantCity TransactionDate
  2.  
  3. import sys
  4. from awsglue.transforms import *
  5. from awsglue.utils import getResolvedOptions
  6. from pyspark.context import SparkContext
  7. from awsglue.context import GlueContext
  8. from awsglue.job import Job
  9. from joblib import Parallel, delayed
  10. import multiprocessing
  11.  
  12. glueContext = GlueContext(SparkContext.getOrCreate())
  13.  
  14. # Created this list just to emulate the behavior of partition schemes with only one table in Redshift.
  15.  
  16. partition_keys = ['txn_type','amount','trans_date','acceptor_ref','location_schema','settlement_date','merchant_city','merchant_state','merchant_country','mcc','industry_code','tran_code','reason_code','plan_id','pin_txn','eci','prescore_amount','batch_date','src_file_name','load_time']
  17.  
  18. txn_table_df = glueContext.create_dynamic_frame_from_options (
  19. connection_type = 'redshift',
  20. connection_options = {"url": "jdbc:redshift://testredshiftcluster.**.us-east-1.redshift.amazonaws.com:5439/dev", "user": "**", "password": "**","dbtable": "loyalty.dailyclienttxn", "redshiftTmpDir": "s3://loyalty-poc-arm/tempDirectory/"}
  21. )
  22.  
  23. def read_and_write(partition_key):
  24. path = "s3://loyalty-poc-arm/allpartitionsWithouParallelRun4/" + partition_key
  25. glueContext.write_dynamic_frame_from_options(
  26. frame = txn_table_df,
  27. connection_type = "s3",
  28. connection_options = {"path": path, "partitionKeys": [partition_key]},
  29. format = "parquet")
  30.  
  31. #Used joblib to parallel execute the for loop so that I can write in parallel
  32. results = Parallel(n_jobs=-1, prefer="threads")(delayed(read_and_write)(partition_key) for partition_key in partition_keys)
  33.  
  34. Worker type: G.2X
  35. No of workers: 149
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement