gabalese

cssstylelist.py (XML)

Apr 14th, 2012
176
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #! /usr/bin/env python3
  2. # file: cssstylelist.py
  3. # Make a list of every style used in html and returns that
  4. # From the CLI, type `./cssstylelist.py > file_with_a_list.txt
  5. # Feel free to address any complain to @gabalese
  6.  
  7. import os, glob, sys
  8. try:
  9.     from lxml import etree as ET
  10. except ImportError:
  11.     import xml.etree.ElementTree as ET
  12.     print("lxml not installed. Running with xml.etree instead")
  13.  
  14. path = "OEBPS/Text" # your mileage may vary
  15. list = []
  16. new_list = []
  17.  
  18. def cssList():
  19.     global list
  20.     global new_list
  21.     for infile in glob.glob(os.path.join(path, '*html')):
  22.         try:
  23.             html = ET.parse(infile).getroot()
  24.         except:
  25.             print("ERROR: Unable to parse " + infile)
  26.             print("This is likely to happen with ill-formed xhtml files.")
  27.             sys.exit(1)
  28.         for i in html.iter():
  29.             list.append(i.get("class"))
  30.    
  31.     for i in list:
  32.         if i not in new_list:
  33.             if i is not None:
  34.                 new_list.append(i)
  35.            
  36.     return new_list
  37.  
  38.  
  39.    
  40. if __name__ == "__main__":
  41.     for item in (cssList()):
  42.         print(item)
RAW Paste Data