fwosar

Untitled

Feb 27th, 2015
417
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.14 KB | None | 0 0
  1. import os
  2. import sys
  3. import re
  4. import codecs
  5. import io
  6. import requests
  7. from html.parser import HTMLParser
  8.  
  9. class HTMLTableParser(HTMLParser):
  10.     def __init__(self):
  11.         HTMLParser.__init__(self)
  12.         self._in_td = False
  13.         self._in_th = False
  14.         self._current_table = []
  15.         self._current_row = []
  16.         self._current_cell = []
  17.         self.tables = []
  18.  
  19.     def handle_starttag(self, tag, attrs):
  20.         if tag == 'td':
  21.             self._in_td = True
  22.         if tag == 'th':
  23.             self._in_th = True
  24.  
  25.     def handle_data(self, data):
  26.         if self._in_td ^ self._in_th:
  27.             self._current_cell.append(data.strip())
  28.  
  29.     def handle_endtag(self, tag):
  30.         if tag == 'td':
  31.             self._in_td = False
  32.         elif tag == 'th':
  33.             self._in_th = False
  34.  
  35.         if tag in ['td', 'th']:
  36.             final_cell = " ".join(self._current_cell).strip()
  37.             self._current_row.append(final_cell)
  38.             self._current_cell = []
  39.         elif tag == 'tr':
  40.             self._current_table.append(self._current_row)
  41.             self._current_row = []
  42.         elif tag == 'table':
  43.             self.tables.append(self._current_table)
  44.             self._current_table = []
  45.  
  46. BOMS = [
  47.     (codecs.BOM_UTF8, 'utf-8-sig', 0),
  48.     (codecs.BOM_UTF32_LE, 'utf-32le', 4),
  49.     (codecs.BOM_UTF32_BE, 'utf-32be', 4),
  50.     (codecs.BOM_UTF16_LE, 'utf-16le', 2),
  51.     (codecs.BOM_UTF16_BE, 'utf-16be', 2)
  52. ]
  53.  
  54. def read_unicode(file_path):
  55.     with io.open(file_path, 'rb') as f:
  56.         data = f.read(4)
  57.     for bom, encoding, seek_to in BOMS:
  58.         if data.startswith(bom):
  59.             break
  60.     else:
  61.         encoding, seek_to = 'utf-8', 0
  62.     with io.open(file_path, 'r', encoding=encoding) as f:
  63.         f.seek(seek_to)
  64.         return f.read()
  65.  
  66. def query_systemlookup(classid):
  67.     try:
  68.         params = {'type': 'clsid', 'search': classid, 's': ''}
  69.         return requests.get('http://www.systemlookup.com/search.php', params=params).text
  70.     except:
  71.         return ""
  72.  
  73. def parse_systemlookup_table(table):
  74.     result = ""
  75.     for row in table[1:]:
  76.         result += "\n  Name:        {}\n  Filename:    {}\n  Description: {}\n  Status:      {}\n".format(row[1], row[2], row[3], row[4])
  77.     return result
  78.  
  79. def get_classid_details(classid):
  80.     result = ""
  81.     parser = HTMLTableParser()
  82.     parser.feed(query_systemlookup(classid))
  83.  
  84.     if len(parser.tables) > 1:
  85.         result = "Class ID {} is listed in SystemLookup: \n{}".format(classid, parse_systemlookup_table(parser.tables[1]))
  86.     else:
  87.         result = "Class ID {} is unknown.\n".format(classid)
  88.     return result
  89.  
  90. def main(argv):
  91.     classids = list()
  92.  
  93.     if (len(argv) != 1) or (not os.path.isfile(argv[0])):
  94.         return
  95.  
  96.     content = read_unicode(argv[0])
  97.     for match in re.finditer(r"(?:(\()|(\{))?\b[A-F0-9]{8}(?:-[A-F0-9]{4}){3}-[A-F0-9]{12}\b(?(1)\))(?(2)\})", content):
  98.         if match.group().upper() not in classids:
  99.             classids.append(match.group().upper())
  100.  
  101.     for classid in classids:
  102.         print(get_classid_details(classid) + "================================================================================\n")
  103.  
  104. if __name__ == "__main__":
  105.    main(sys.argv[1:])
Advertisement
Add Comment
Please, Sign In to add comment