Guest User

Untitled

a guest
Jul 17th, 2018
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.39 KB | None | 0 0
  1. import boto3
  2. import pandas as pd
  3. from io import StringIO
  4.  
  5. class S3DataFrame(pd.DataFrame):
  6.  
  7. """
  8. # Make a dataframe and upload it as csv
  9. s3df = S3DataFrame({'h1':[1], 'h2':[2]})
  10. s3df.to_s3(Bucket='bucket-name',
  11. Key='file-key-on-s3', # The name of the file when it is stored in s3
  12. SSEKMSKeyId='kms-id') # note: the kms should be in the same region as the bucket
  13.  
  14. # Download the same csv into DataFrame
  15. s3df2 = S3DataFrame.from_s3(Bucket='bucket-name', Key='file-key-on-s3')
  16.  
  17. """
  18.  
  19. client = boto3.client('s3') # May need region but seems to work without
  20.  
  21.  
  22. def to_s3(self, Bucket, Key, SSEKMSKeyId):
  23. """Sends csv to S3 from memory"""
  24. csv_buffer = StringIO()
  25. self.to_csv(csv_buffer, index=False)
  26. return self.client.put_object(Bucket=Bucket, Key=Key, Body=csv_buffer.getvalue(), SSEKMSKeyId=SSEKMSKeyId, ServerSideEncryption='aws:kms')
  27.  
  28.  
  29. @classmethod
  30. def from_s3(cls, Bucket, Key):
  31. """Collects csv into memory from S3"""
  32. res = cls.client.get_object(Bucket=Bucket, Key=Key)['Body'].read()
  33. return pd.read_csv(StringIO(res.decode('utf-8')))
  34.  
  35.  
  36. if __name__ == "__main__":
  37.  
  38. BUCKET_NAME = 'bucket-name'
  39. FILE_KEY = 'test.csv'
  40. KMS_KEY_ID = 'kms-id'
  41.  
  42. # Creates a DataFrame and writes to S3
  43. s3df = S3DataFrame({'h1':[1], 'h2':[2]})
  44. s3df.to_s3(Bucket=BUCKET_NAME, Key=FILE_KEY, SSEKMSKeyId=KMS_KEY_ID)
  45.  
  46. # Reads from S3
  47. s3df2 = S3DataFrame.from_s3(Bucket=BUCKET_NAME, Key=FILE_KEY)
Add Comment
Please, Sign In to add comment