Untitled

import os
import sys
import re
import codecs
import io
import requests
from html.parser import HTMLParser

class HTMLTableParser(HTMLParser):
    def __init__(self):
        HTMLParser.__init__(self)
        self._in_td = False
        self._in_th = False
        self._current_table = []
        self._current_row = []
        self._current_cell = []
        self.tables = []

    def handle_starttag(self, tag, attrs):
        if tag == 'td':
            self._in_td = True
        if tag == 'th':
            self._in_th = True

    def handle_data(self, data):
        if self._in_td ^ self._in_th:
            self._current_cell.append(data.strip())

    def handle_endtag(self, tag):
        if tag == 'td':
            self._in_td = False
        elif tag == 'th':
            self._in_th = False

        if tag in ['td', 'th']:
            final_cell = " ".join(self._current_cell).strip()
            self._current_row.append(final_cell)
            self._current_cell = []
        elif tag == 'tr':
            self._current_table.append(self._current_row)
            self._current_row = []
        elif tag == 'table':
            self.tables.append(self._current_table)
            self._current_table = []

BOMS = [
    (codecs.BOM_UTF8, 'utf-8-sig', 0),
    (codecs.BOM_UTF32_LE, 'utf-32le', 4),
    (codecs.BOM_UTF32_BE, 'utf-32be', 4),
    (codecs.BOM_UTF16_LE, 'utf-16le', 2),
    (codecs.BOM_UTF16_BE, 'utf-16be', 2)
]

def read_unicode(file_path):
    with io.open(file_path, 'rb') as f:
        data = f.read(4)
    for bom, encoding, seek_to in BOMS:
        if data.startswith(bom):
            break
    else:
        encoding, seek_to = 'utf-8', 0
    with io.open(file_path, 'r', encoding=encoding) as f:
        f.seek(seek_to)
        return f.read()

def query_systemlookup(classid):
    try:
        params = {'type': 'clsid', 'search': classid, 's': ''}
        return requests.get('http://www.systemlookup.com/search.php', params=params).text
    except:
        return ""

def parse_systemlookup_table(table):
    result = ""
    for row in table[1:]:
        result += "\n  Name:        {}\n  Filename:    {}\n  Description: {}\n  Status:      {}\n".format(row[1], row[2], row[3], row[4])
    return result

def get_classid_details(classid):
    result = ""
    parser = HTMLTableParser()
    parser.feed(query_systemlookup(classid))

    if len(parser.tables) > 1:
        result = "Class ID {} is listed in SystemLookup: \n{}".format(classid, parse_systemlookup_table(parser.tables[1]))
    else:
        result = "Class ID {} is unknown.\n".format(classid)
    return result

def main(argv):
    classids = list()

    if (len(argv) != 1) or (not os.path.isfile(argv[0])):
        return

    content = read_unicode(argv[0])
    for match in re.finditer(r"(?:(\()|(\{))?\b[A-F0-9]{8}(?:-[A-F0-9]{4}){3}-[A-F0-9]{12}\b(?(1)\))(?(2)\})", content):
        if match.group().upper() not in classids:
            classids.append(match.group().upper())

    for classid in classids:
        print(get_classid_details(classid) + "================================================================================\n")

if __name__ == "__main__":
   main(sys.argv[1:])