Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- hi team i was trying to do read file from s3bucket. as oof now am able to download the file reading that same file
- instead of downloading the file can i read from s3 bucket
- some one please suggest me the code,how to read file from s3bucket
- import os, sys, csv,json
- from itertools import product
- import requestsfrom requests_aws4auth
- import AWS4Authimport boto3import botocore
- import configfrom elasticsearch
- import helpers,Elasticsearches = Elasticsearch("localhost:9200")
- s3 = boto3.resource('s3')
- ACCESS=config.s3bucket['aws_access_key']
- SECRET=config.s3bucket['aws_secret_key']
- url_e=config.s3bucket['url']
- client = boto3.client(service_name="s3", region_name="us-east-1",endpoint_url=url_e,verify=False,aws_access_key_id = ACCESS,aws_secret_access_key= SECRET)
- client.download_file('s3_bucket','sourcefile.csv','sourcefile.csv')
- print ("Downloading object %s from bucket %s" % ('sourcefile.csv','s3_bucket'))
- #step1: file split into two files
- if os.path.exists('file1.csv'):
- print('File is previously present and have been deleted !')
- os.remove('file1.csv')
- if os.path.exists('file2.csv'):
- print('File is previously present and have been deleted !')
- os.remove('file2.csv')
- condition = 0
- header = []
- with open('sourcefile.csv', 'r', encoding='UTF-8') as file:
- for lines in file:
- if condition ==0:
- header = lines.strip('\n').split(',')
- elif condition < 2:
- with open('file1.csv', 'a', encoding='UTF-8') as file_01:
- if os.stat('file1.csv').st_size == 0:
- file_01.write(','.join(header) + '\n')
- file_01.write(lines.strip('\n') + '\n')
- elif condition >= 3:
- with open('file2.csv', 'a', encoding='UTF-8') as file_02:
- file_02.write(lines.strip('\n') + '\n')
- condition += 1
- print('file spliting completed')
- #step2:merging both files and updating header.
- with open(r'file1.csv','rt')as f:
- data1 = csv.reader(f,delimiter=",")
- list1= [row for row in data1]
- with open(r'file2.csv','rt')as f:
- data2 = csv.reader(f,delimiter=",")
- list2= [row for row in data2]
- header= ["dept_id","dept_role","dept_name","dept_date","dept_service","id","name","start_time","end_time","user","manufacturing","exp"]
- if os.path.exists('BAY23_merge.csv'):
- print('File is previously present and have been deleted !')
- os.remove('BAY23_merge.csv')
- with open('file_merge.csv', 'w', newline='') as csvfile:
- csvwriter = csv.writer(csvfile)
- csvfile.write(','.join(header)+'\n')
- for r in list2[1:]:
- csvwriter.writerow(list1[1]+r)
- print('files has merged and header has updated')
- #step3:loading data into elasticsearch:
- from elasticsearch import helpers,Elasticsearches = Elasticsearch("localhost:9200")
- with open('file_merge.csv') as f:
- reader = csv.DictReader(f)
- helpers.bulk(es, reader, index='test_data_sample', doc_type='test-sample')
- print('data loaded into ES')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement