Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import base64
- import json
- import re
- from datetime import datetime, timedelta
- print('Loading function')
- def lambda_handler(event, context):
- output = []
- succeeded_record_cnt = 0
- failed_record_cnt = 0
- for record in event['records']:
- # event['records']: base64になっている複数行のログ
- # record: base64になってるログ一行
- payload = base64.b64decode(record['data']).decode()
- # payload: base64じゃない普通のログ一行
- payload = base64.b64decode(record['data']).decode()
- regex_string = (r"^((?:\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?"
- r"|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b\s+(?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])\s+"
- r"(?:(?:2[0123]|[01]?[0-9]):(?:[0-5][0-9]):(?:(?:[0-5]?[0-9]|60)(?:[:\.,][0-9]+)?)))) (?:<(?:[0-9]+).(?:[0-9]+)> )"
- r"?((?:[a-zA-Z0-9._-]+)) ([\w\._/%-]+)(?:\[((?:[1-9][0-9]*))\])?: (.*)")
- p = re.compile(regex_string)
- m = p.match(payload)
- # regex_string: syslogフォーマットの正規表現 (誰も読めない)
- # m: 正規表現をもとに日付とか時刻とかプロセスIDとかメッセージ本体とかを個別に抽出したものの集合
- # m.group(x): x=1: 日付と時刻, x=2: ログの送り元ホスト名orIP, x=3: ログのクラス, x=4: ???, x=5: メッセージ本体
- if m:
- succeeded_record_cnt += 1
- now_jst = datetime.now() + timedelta(hours=9)
- syslogtime = datetime.strptime(m.group(1), '%b %d %H:%M:%S').replace(year=now_jst.year)
- data_field = {
- 'syslogtime': syslogtime.strftime('%Y-%m-%d %H:%M:%S'),
- 'logsource': m.group(2),
- 'messageclass': int(m.group(3).split('-')[-1])
- }
- token = m.group(5).split(' ')
- if data_field['messageclass'] == 302013 or data_field['messageclass'] == 302015:
- result_code = 'Ok'
- data_field['protocol'] = token[2]
- if token[1] == 'inbound':
- data_field['src_ip'] = token[6].split(':')[1].split('/')[0]
- data_field['src_port'] = int(token[6].split(':')[1].split('/')[1])
- data_field['dst_ip'] = token[9].split(':')[1].split('/')[0]
- data_field['dst_port'] = int(token[9].split(':')[1].split('/')[1])
- elif token[1] == 'outbound':
- data_field['dst_ip'] = token[6].split(':')[1].split('/')[0]
- data_field['dst_port'] = int(token[6].split(':')[1].split('/')[1])
- data_field['src_ip'] = token[9].split(':')[1].split('/')[0]
- data_field['src_port'] = int(token[9].split(':')[1].split('/')[1])
- else:
- result_code = 'Dropped'
- # data_field: JSON化前のデータ。これをCSVとかJSONとかにすると、一行分のCSVとかJSONにできる
- # 例) json.dumps(data_field): data_fieldをJSON化した文字列
- output_record = {
- 'recordId': record['recordId'],
- 'result': result_code,
- 'data': base64.b64encode((json.dumps(data_field)+'\n').encode()).decode()
- }
- else:
- print('Parsing failed')
- failed_record_cnt += 1
- output_record = {
- 'recordId': record['recordId'],
- 'result': 'ProcessingFailed',
- 'data': payload
- }
- output.append(output_record)
- print('Processing completed. Successful records {}, Failed records {}.'.format(succeeded_record_cnt, failed_record_cnt))
- return {'records': output}
Add Comment
Please, Sign In to add comment