Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # coding: utf-8
- import sys
- import re
- from HTMLParser import HTMLParser
- from optparse import OptionParser
- #オプション解析
- options = OptionParser(usage = '%prog ', description = 'for mksummary(03 sheets)')
- options.add_option('-v', '--verbose', default = False, action='store_true', help = 'Verbose mode.')
- def get_lhost(ipaddr):
- """lhostを計算して返す"""
- if ipaddr == None:
- return None
- result = []
- for i in ipaddr.split("."):
- result += [i.zfill(3)]
- return ".".join(result)
- class RetinaSummaryParser(HTMLParser):
- def __init__(self):
- HTMLParser.__init__(self)
- # 汎用
- self.flg_tr = False
- self.flg_td = False
- self.flg_th = False
- self.flg_div = False
- self.flg_h2 = False
- self.count = 0
- # 状態判定用フラグ
- self.flg_indata = False # ポートのデータが記録されたテーブルを処理中か判定するフラグ
- self.flg_getdata = False # 対象のデータが存在するセルに到達したらセット するフラグ
- self.flg_port = False # Portsのレポート内を探索してるかを判定するフラグ
- self.flg_ipaddr = False
- self.flg_ipaddr2 = False # IPアドレスを探すフラグ
- self.flg_end = False # データの回収が終わったことを判定するフラグ
- #データ判別フラグ
- self.flg_portnum = False
- self.flg_protocol = False
- self.flg_name = False
- self.flg_stat = False
- # データ格納用
- self.ipaddr = None
- self.port = None
- self.prot = None
- self.name = None
- self.stat = None
- def handle_starttag(self, tag, attrs):
- if tag == 'tr':
- self.flg_tr = True
- if tag == 'td':
- self.flg_td = True
- if 'class' in dict(attrs):
- if dict(attrs)['class'] == 'h2':
- self.flg_h2 = True
- if tag == 'th':
- self.flg_th = True
- if tag == 'div':
- self.flg_div = True
- if self.flg_port and self.flg_td:
- if 'class' in dict(attrs):
- #if dict(attrs)['class'] == 'c5': #データの始まり
- # self.flg_ipaddr = True
- if dict(attrs)['class'] == 'h4': #ポートの概要
- self.flg_portnum = True
- self.flg_getdata = True
- self.count = 0
- if dict(attrs)['class'] == 'h5': #ポートの詳細データ
- self.flg_getdata = True
- self.count += 1
- if self.flg_ipaddr and self.flg_div:
- if 'class' in dict(attrs):
- if dict(attrs)['class'] == 'h4 text-primary':
- self.flg_ipaddr = False
- self.flg_ipaddr2 = True
- def handle_endtag(self, tag):
- if tag == 'tr':
- self.flg_tr = False
- if tag == 'table':
- if self.flg_indata: # データ回収完了フラグが真なら出力
- # lhost shost port prot stat name
- print '%s,%s,%05d,%s,%s,%s' % (
- get_lhost(self.ipaddr),
- self.ipaddr,
- int(self.port),
- self.prot,
- self.stat,
- self.name,
- )
- self.flg_indata = False
- if tag == 'td':
- self.flg_td = False
- self.flg_h2 = False
- if tag == 'th':
- self.flg_th = False
- if tag == 'div':
- self.flg_div = False
- def handle_data(self, data):
- # レポート内の「Ports」だけ対象にする
- #if re.match(r'^ポート$', data) and self.flg_td:
- # self.flg_port = True
- # 他のレポートの内容はすべて無視する
- #if re.match(r'^Services$', data) and self.flg_td:
- # self.flg_port = False
- #if re.match(r'^Users$', data) and self.flg_td:
- # self.flg_port = False
- #if re.match(r'^ソフトウェア$', data) and self.flg_td:
- # self.flg_port = False
- #if re.match(r'^監査$', data) and self.flg_td:
- # self.flg_port = False
- #if re.match(r'^Shares$', data) and self.flg_td:
- # self.flg_port = False
- #if re.match(r'^概要$', data) and self.flg_td:
- # self.flg_port = False
- #if re.match(r'^アラート$', data) and self.flg_td:
- # self.flg_port = False
- if self.flg_h2:
- if re.match(r'^ポート$', data):
- self.flg_port = True
- else:
- self.flg_port = False
- # data判別
- if re.match(r'^ポートタイプ$', data) and self.flg_th:
- self.flg_protocol = True
- if re.match(r'^ポートの状態$', data) and self.flg_th:
- self.flg_stat = True
- #if re.match(r'^解説$', data) and self.flg_th:
- if re.match(r'^検出されたプロトコル$', data) and self.flg_th:
- self.flg_name = True
- # IP addr
- if re.match(r'^IP アドレス:$', data) and self.flg_div:
- self.flg_ipaddr = True
- # IPアドレス取得
- if self.flg_ipaddr2:
- self.ipaddr = data.strip()
- self.flg_ipaddr2 = False
- # ポートデータ取得
- if self.flg_getdata:
- self.flg_getdata = False
- if opts.verbose:
- print ">>> [%d] : %s, %s, %s" % (self.count, data, self.getpos(), self.get_starttag_text())
- if self.flg_portnum: # ポート番号
- self.port = data.strip("UDP:").strip("TCP:").strip()
- self.flg_indata = True
- self.flg_portnum = False
- if self.flg_protocol: # プロトコル
- self.prot = data.strip()
- self.flg_protocol = False
- if self.flg_name: # サービス名
- #self.name = data.replace("\n","").lstrip("[REMOTE]").lstrip("[LOCAL]").strip()
- self.name = data.strip()
- self.flg_name = False
- if self.flg_stat: # 状態
- self.stat = data.strip()
- self.flg_stat = False
- else:
- self.flg_portnum = False
- self.flg_protocol = False
- self.flg_name = False
- self.flg_stat = False
- if __name__ == '__main__':
- opts, args = options.parse_args()
- if len(sys.argv) <= 1:
- print '[E] Give me Retina export summary data files(html).'
- sys.exit(1)
- for filename in sys.argv[1:]:
- try:
- f = open(filename).read()
- except:
- continue
- f = f.replace(',', ' ') # カンマを空白に置き換え
- f = f.replace('<BR>', '') # 不要なタグを事前に削除
- p = RetinaSummaryParser()
- p.feed(f)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement