Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import sys
- import re
- import codecs
- import io
- import requests
- from html.parser import HTMLParser
- class HTMLTableParser(HTMLParser):
- def __init__(self):
- HTMLParser.__init__(self)
- self._in_td = False
- self._in_th = False
- self._current_table = []
- self._current_row = []
- self._current_cell = []
- self.tables = []
- def handle_starttag(self, tag, attrs):
- if tag == 'td':
- self._in_td = True
- if tag == 'th':
- self._in_th = True
- def handle_data(self, data):
- if self._in_td ^ self._in_th:
- self._current_cell.append(data.strip())
- def handle_endtag(self, tag):
- if tag == 'td':
- self._in_td = False
- elif tag == 'th':
- self._in_th = False
- if tag in ['td', 'th']:
- final_cell = " ".join(self._current_cell).strip()
- self._current_row.append(final_cell)
- self._current_cell = []
- elif tag == 'tr':
- self._current_table.append(self._current_row)
- self._current_row = []
- elif tag == 'table':
- self.tables.append(self._current_table)
- self._current_table = []
- BOMS = [
- (codecs.BOM_UTF8, 'utf-8-sig', 0),
- (codecs.BOM_UTF32_LE, 'utf-32le', 4),
- (codecs.BOM_UTF32_BE, 'utf-32be', 4),
- (codecs.BOM_UTF16_LE, 'utf-16le', 2),
- (codecs.BOM_UTF16_BE, 'utf-16be', 2)
- ]
- def read_unicode(file_path):
- with io.open(file_path, 'rb') as f:
- data = f.read(4)
- for bom, encoding, seek_to in BOMS:
- if data.startswith(bom):
- break
- else:
- encoding, seek_to = 'utf-8', 0
- with io.open(file_path, 'r', encoding=encoding) as f:
- f.seek(seek_to)
- return f.read()
- def query_systemlookup(classid):
- try:
- params = {'type': 'clsid', 'search': classid, 's': ''}
- return requests.get('http://www.systemlookup.com/search.php', params=params).text
- except:
- return ""
- def parse_systemlookup_table(table):
- result = ""
- for row in table[1:]:
- result += "\n Name: {}\n Filename: {}\n Description: {}\n Status: {}\n".format(row[1], row[2], row[3], row[4])
- return result
- def get_classid_details(classid):
- result = ""
- parser = HTMLTableParser()
- parser.feed(query_systemlookup(classid))
- if len(parser.tables) > 1:
- result = "Class ID {} is listed in SystemLookup: \n{}".format(classid, parse_systemlookup_table(parser.tables[1]))
- else:
- result = "Class ID {} is unknown.\n".format(classid)
- return result
- def main(argv):
- classids = list()
- if (len(argv) != 1) or (not os.path.isfile(argv[0])):
- return
- content = read_unicode(argv[0])
- for match in re.finditer(r"(?:(\()|(\{))?\b[A-F0-9]{8}(?:-[A-F0-9]{4}){3}-[A-F0-9]{12}\b(?(1)\))(?(2)\})", content):
- if match.group().upper() not in classids:
- classids.append(match.group().upper())
- for classid in classids:
- print(get_classid_details(classid) + "================================================================================\n")
- if __name__ == "__main__":
- main(sys.argv[1:])
Advertisement
Add Comment
Please, Sign In to add comment