Advertisement
krishnaetl2102

s3bucket with python

Dec 11th, 2019
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.96 KB | None | 0 0
  1. hi team i was trying to do read file from s3bucket. as oof now am able to download the file reading that same file
  2. instead of downloading the file can i read from s3 bucket
  3. some one please suggest me the code,how to read file from s3bucket
  4.  
  5.  
  6. import os, sys, csv,json
  7. from itertools import product
  8. import requestsfrom requests_aws4auth
  9. import AWS4Authimport boto3import botocore
  10. import configfrom elasticsearch
  11. import helpers,Elasticsearches = Elasticsearch("localhost:9200")
  12.  
  13. s3 = boto3.resource('s3')
  14. ACCESS=config.s3bucket['aws_access_key']
  15. SECRET=config.s3bucket['aws_secret_key']
  16. url_e=config.s3bucket['url']
  17.  
  18. client = boto3.client(service_name="s3", region_name="us-east-1",endpoint_url=url_e,verify=False,aws_access_key_id = ACCESS,aws_secret_access_key= SECRET)
  19.  
  20. client.download_file('s3_bucket','sourcefile.csv','sourcefile.csv')
  21. print ("Downloading object %s from bucket %s" % ('sourcefile.csv','s3_bucket'))
  22.  
  23. #step1: file split into two files
  24.  
  25. if os.path.exists('file1.csv'):
  26.     print('File is previously present and have been deleted !')
  27.     os.remove('file1.csv')
  28. if os.path.exists('file2.csv'):
  29.     print('File is previously present and have been deleted !')
  30.     os.remove('file2.csv')
  31.  
  32. condition = 0
  33. header = []
  34.  
  35. with open('sourcefile.csv', 'r', encoding='UTF-8') as file:
  36.     for lines in file:
  37.         if condition ==0:
  38.             header = lines.strip('\n').split(',')
  39.         elif condition < 2:
  40.             with open('file1.csv', 'a', encoding='UTF-8') as file_01:
  41.                 if os.stat('file1.csv').st_size == 0:
  42.                     file_01.write(','.join(header) + '\n')
  43.                     file_01.write(lines.strip('\n') + '\n')
  44.         elif condition >= 3:
  45.             with open('file2.csv', 'a', encoding='UTF-8') as file_02:
  46.                 file_02.write(lines.strip('\n') + '\n')
  47. condition += 1
  48. print('file spliting completed')
  49.  
  50. #step2:merging both files and updating header.
  51.  
  52. with open(r'file1.csv','rt')as f:
  53.     data1 = csv.reader(f,delimiter=",")
  54.     list1= [row for row in data1]
  55. with open(r'file2.csv','rt')as f:
  56.     data2 = csv.reader(f,delimiter=",")
  57.     list2= [row for row in data2]
  58. header= ["dept_id","dept_role","dept_name","dept_date","dept_service","id","name","start_time","end_time","user","manufacturing","exp"]
  59. if os.path.exists('BAY23_merge.csv'):
  60.     print('File is previously present and have been deleted !')
  61.     os.remove('BAY23_merge.csv')
  62. with open('file_merge.csv', 'w', newline='') as csvfile:
  63.     csvwriter = csv.writer(csvfile)
  64.     csvfile.write(','.join(header)+'\n')
  65.     for r in list2[1:]:
  66.         csvwriter.writerow(list1[1]+r)
  67. print('files has merged and header has updated')
  68.  
  69. #step3:loading data into elasticsearch:
  70.  
  71. from elasticsearch import helpers,Elasticsearches = Elasticsearch("localhost:9200")
  72. with open('file_merge.csv') as f:
  73.     reader = csv.DictReader(f)
  74.     helpers.bulk(es, reader, index='test_data_sample', doc_type='test-sample')
  75.    
  76. print('data loaded into ES')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement