Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/env python3
- # file: cssstylelist.py
- # Make a list of every style used in html and returns that
- # From the CLI, type `./cssstylelist.py > file_with_a_list.txt
- # Feel free to address any complain to @gabalese
- import os, glob, sys
- try:
- from lxml import etree as ET
- except ImportError:
- import xml.etree.ElementTree as ET
- print("Failed to import lxml: running with standard xml.etree instead.")
- path = "OEBPS/Text" # your mileage may vary
- list = []
- new_list = []
- def cssList():
- global list
- global new_list
- parser = ET.XMLParser(recover=True) # this makes the XML parsing "tolerant" of ill-formedness
- for infile in glob.glob(os.path.join(path, '*html')):
- try:
- html = ET.parse(infile, parser).getroot()
- except:
- print("ERROR: Unable to parse " + infile)
- print("This is likely to happen with ill-formed xhtml files.")
- sys.exit(1)
- for i in html.iter():
- list.append(i.get("class"))
- for i in list:
- if i not in new_list:
- if i is not None:
- new_list.append(i)
- return new_list
- if __name__ == "__main__":
- for item in (cssList()):
- print(item)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement