SHARE
TWEET

cssstylelist.py (XML)

gabalese Apr 14th, 2012 158 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #! /usr/bin/env python3
  2. # file: cssstylelist.py
  3. # Make a list of every style used in html and returns that
  4. # From the CLI, type `./cssstylelist.py > file_with_a_list.txt
  5. # Feel free to address any complain to @gabalese
  6.  
  7. import os, glob, sys
  8. try:
  9.         from lxml import etree as ET
  10. except ImportError:
  11.         import xml.etree.ElementTree as ET
  12.         print("lxml not installed. Running with xml.etree instead")
  13.  
  14. path = "OEBPS/Text" # your mileage may vary
  15. list = []
  16. new_list = []
  17.  
  18. def cssList():
  19.         global list
  20.         global new_list
  21.         for infile in glob.glob(os.path.join(path, '*html')):
  22.                 try:
  23.                         html = ET.parse(infile).getroot()
  24.                 except:
  25.                         print("ERROR: Unable to parse " + infile)
  26.                         print("This is likely to happen with ill-formed xhtml files.")
  27.                         sys.exit(1)
  28.                 for i in html.iter():
  29.                         list.append(i.get("class"))
  30.        
  31.         for i in list:
  32.                 if i not in new_list:
  33.                         if i is not None:
  34.                                 new_list.append(i)
  35.                        
  36.         return new_list
  37.  
  38.  
  39.        
  40. if __name__ == "__main__":
  41.         for item in (cssList()):
  42.                 print(item)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top