Untitled

import boto3
import pandas as pd
from io import StringIO

class S3DataFrame(pd.DataFrame):

	"""
	# Make a dataframe and upload it as csv
	s3df = S3DataFrame({'h1':[1], 'h2':[2]})
	s3df.to_s3(Bucket='bucket-name',
	 Key='file-key-on-s3', # The name of the file when it is stored in s3
			   SSEKMSKeyId='kms-id') # note: the kms should be in the same region as the bucket

	# Download the same csv into DataFrame
	s3df2 = S3DataFrame.from_s3(Bucket='bucket-name', Key='file-key-on-s3')

	"""

	client = boto3.client('s3') # May need region but seems to work without


	def to_s3(self, Bucket, Key, SSEKMSKeyId):
		"""Sends csv to S3 from memory"""
		csv_buffer = StringIO()
		self.to_csv(csv_buffer, index=False)
		return self.client.put_object(Bucket=Bucket, Key=Key, Body=csv_buffer.getvalue(), SSEKMSKeyId=SSEKMSKeyId, ServerSideEncryption='aws:kms')


	@classmethod
	def from_s3(cls, Bucket, Key):
		"""Collects csv into memory from S3"""
		res = cls.client.get_object(Bucket=Bucket, Key=Key)['Body'].read()
		return pd.read_csv(StringIO(res.decode('utf-8')))


if __name__ == "__main__":

	BUCKET_NAME = 'bucket-name'
	FILE_KEY = 'test.csv'
	KMS_KEY_ID = 'kms-id'

	# Creates a DataFrame and writes to S3
	s3df = S3DataFrame({'h1':[1], 'h2':[2]})
	s3df.to_s3(Bucket=BUCKET_NAME, Key=FILE_KEY, SSEKMSKeyId=KMS_KEY_ID)

	# Reads from S3
	s3df2 = S3DataFrame.from_s3(Bucket=BUCKET_NAME, Key=FILE_KEY)