Advertisement
gabalese

cssstylelist.py (HTML safe)

Apr 16th, 2012
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.11 KB | None | 0 0
  1. #! /usr/bin/env python3
  2. # file: cssstylelist.py
  3. # Make a list of every style used in html and returns that
  4. # From the CLI, type `./cssstylelist.py > file_with_a_list.txt
  5. # Feel free to address any complain to @gabalese
  6.  
  7. import os, glob, sys
  8. try:
  9.     from lxml import etree as ET
  10. except ImportError:
  11.     import xml.etree.ElementTree as ET
  12.     print("Failed to import lxml: running with standard xml.etree instead.")
  13.    
  14. path = "OEBPS/Text" # your mileage may vary
  15. list = []
  16. new_list = []
  17.  
  18. def cssList():
  19.     global list
  20.     global new_list
  21.     parser = ET.XMLParser(recover=True) # this makes the XML parsing "tolerant" of ill-formedness
  22.     for infile in glob.glob(os.path.join(path, '*html')):
  23.         try:
  24.             html = ET.parse(infile, parser).getroot()
  25.         except:
  26.             print("ERROR: Unable to parse " + infile)
  27.             print("This is likely to happen with ill-formed xhtml files.")
  28.             sys.exit(1)
  29.         for i in html.iter():
  30.             list.append(i.get("class"))
  31.    
  32.     for i in list:
  33.         if i not in new_list:
  34.             if i is not None:
  35.                 new_list.append(i)
  36.            
  37.     return new_list
  38.  
  39.  
  40.    
  41. if __name__ == "__main__":
  42.     for item in (cssList()):
  43.         print(item)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement