Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- s3 = boto3.client('s3')
- bucket = bucket_name
- file_name = file_key
- sql_stmt = """SELECT S.* FROM s3object S LIMIT 10"""
- req = s3.select_object_content(
- Bucket=bucket_name,
- Key=file_key,
- ExpressionType='SQL',
- Expression=sql_stmt,
- InputSerialization = {'CSV': {'FileHeaderInfo': 'USE'}},
- OutputSerialization = {'CSV': {}},
- )
- records = []
- for event in req['Payload']:
- if 'Records' in event:
- records.append(event['Records']['Payload'])
- elif 'Stats' in event:
- stats = event['Stats']['Details']
- file_str = ''.join(r.decode('utf-8') for r in records)
- select_df = pd.read_csv(StringIO(file_str))
- print(len(select_df))
- df = pd.DataFrame(select_df)
- print(df)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement