Guest User

Untitled

a guest
May 27th, 2018
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.14 KB | None | 0 0
  1. import boto3
  2. import argparse
  3. import elasticsearch
  4. from io import TextIOWrapper
  5. from gzip import GzipFile
  6. import csv
  7.  
  8. fact_key = "/2018/05/15/mycsv_files"
  9. BUCKET = 'csv_data'
  10. print(f'Reading files at {fact_key}')
  11.  
  12. parser = argparse.ArgumentParser(description='S3 Reader')
  13. parser.add_argument('token', type=str, help='6 digit mfa token', default='', nargs='?')
  14.  
  15. args = parser.parse_args()
  16. token = args.token
  17.  
  18. if token:
  19. # if supplied, the MFA token will authenticate thought API
  20. sts_client = boto3.client('sts')
  21.  
  22. print('Assuming Role...')
  23. # From the response that contains the assumed role, get the temporary
  24. # credentials that can be used to make subsequent API calls
  25. assumedRoleObject = sts_client.assume_role(
  26. RoleArn="arn:aws:iam::1234:role/developer-role",
  27. RoleSessionName="currentRoleSession",
  28. DurationSeconds=3600,
  29. SerialNumber="arn:aws:iam::1234:mfa/felipe.farias",
  30. TokenCode=token
  31. )
  32. credentials = assumedRoleObject['Credentials']
  33. print('Credentials:')
  34. print(credentials)
  35.  
  36. # Use the temporary credentials that AssumeRole returns to make a
  37. # connection to Amazon S3
  38. s3 = boto3.client(
  39. 's3',
  40. aws_access_key_id = credentials['AccessKeyId'],
  41. aws_secret_access_key = credentials['SecretAccessKey'],
  42. aws_session_token = credentials['SessionToken'],
  43. )
  44. else:
  45. # Not token supplied, so runs with current user
  46. s3 = boto3.client('s3')
  47.  
  48.  
  49. # for bucket in s3.buckets.all():
  50. # print(bucket.name)
  51.  
  52.  
  53. def process_file(key):
  54. print(f'processing key {key}')
  55. count = 0
  56. response = s3.get_object(Bucket=BUCKET, Key=key)
  57. gzipped = GzipFile(None, 'rb', fileobj=response['Body'])
  58. data = TextIOWrapper(gzipped)
  59. input_csv = csv.reader(data, delimiter=';', quotechar='"')
  60. for line in input_csv:
  61. if count % 1000000 == 0:
  62. print(f'{count:,}')
  63. if count < 10:
  64. print(line)
  65. count += 1
  66. print(f'Processed {count:,} lines')
  67.  
  68.  
  69. # Read all files from bucket/key
  70. response = s3.list_objects(Bucket=BUCKET, Prefix=fact_key)
  71. for row in response['Contents']:
  72. file_key = row['Key']
  73. process_file(file_key)
Add Comment
Please, Sign In to add comment