Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- CREATE EXTERNAL TABLE IF NOT EXISTS {{DATABASE_NAME.TABLE_NAME}} (
- timestamp string,
- elb_name string,
- request_ip string,
- request_port int,
- backend_ip string,
- backend_port int,
- request_processing_time double,
- backend_processing_time double,
- client_response_time double,
- elb_response_code string,
- backend_response_code string,
- received_bytes bigint,
- sent_bytes bigint,
- request_verb string,
- url string,
- protocol string,
- user_agent string,
- ssl_cipher string,
- ssl_protocol string
- )
- PARTITIONED BY(year string, month string, day string)
- ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
- WITH SERDEPROPERTIES (
- 'serialization.format' = '1',
- 'input.regex' = '([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \\\"([^ ]*) ([^ ]*) (- |[^ ]*)\\\" (\"[^\"]*\") ([A-Z0-9-]+) ([A-Za-z0-9.-]*)$' )
- LOCATION 's3://{{BUCKET_NAME}}/AWSLogs/{{ACCOUNT_ID}}/elasticloadbalancing/{{REGION}}/';
- // To only query for June 2019 load data from Partition
- ALTER TABLE {{DATABASE_NAME.TABLE_NAME}} ADD PARTITION (year='2019', month='06', day='*') location 's3://{{BUCKET_NAME}}/AWSLogs/{{ACCOUNT_ID}}/elasticloadbalancing/{{REGION}}/2019/06/';
- // Query
- SELECT timestamp, user_agent, request_verb,url, ssl_cipher,ssl_protocol
- FROM {{DATABASE_NAME.TABLE_NAME}}
- WHERE ssl_protocol='TLSv1'
- LIMIT 10;
- // Drop Partition
- ALTER TABLE {{DATABASE_NAME.TABLE_NAME}} DROP PARTITION (year='2019', month='06', day='*');
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement