Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import boto3
- import pandas as pd
- from io import StringIO
- class S3DataFrame(pd.DataFrame):
- """
- # Make a dataframe and upload it as csv
- s3df = S3DataFrame({'h1':[1], 'h2':[2]})
- s3df.to_s3(Bucket='bucket-name',
- Key='file-key-on-s3', # The name of the file when it is stored in s3
- SSEKMSKeyId='kms-id') # note: the kms should be in the same region as the bucket
- # Download the same csv into DataFrame
- s3df2 = S3DataFrame.from_s3(Bucket='bucket-name', Key='file-key-on-s3')
- """
- client = boto3.client('s3') # May need region but seems to work without
- def to_s3(self, Bucket, Key, SSEKMSKeyId):
- """Sends csv to S3 from memory"""
- csv_buffer = StringIO()
- self.to_csv(csv_buffer, index=False)
- return self.client.put_object(Bucket=Bucket, Key=Key, Body=csv_buffer.getvalue(), SSEKMSKeyId=SSEKMSKeyId, ServerSideEncryption='aws:kms')
- @classmethod
- def from_s3(cls, Bucket, Key):
- """Collects csv into memory from S3"""
- res = cls.client.get_object(Bucket=Bucket, Key=Key)['Body'].read()
- return pd.read_csv(StringIO(res.decode('utf-8')))
- if __name__ == "__main__":
- BUCKET_NAME = 'bucket-name'
- FILE_KEY = 'test.csv'
- KMS_KEY_ID = 'kms-id'
- # Creates a DataFrame and writes to S3
- s3df = S3DataFrame({'h1':[1], 'h2':[2]})
- s3df.to_s3(Bucket=BUCKET_NAME, Key=FILE_KEY, SSEKMSKeyId=KMS_KEY_ID)
- # Reads from S3
- s3df2 = S3DataFrame.from_s3(Bucket=BUCKET_NAME, Key=FILE_KEY)
Add Comment
Please, Sign In to add comment