Untrusted HTML XSS/CSS challenge 3

#!/usr/bin/env python
# Created By Abraham Aranguren <[email protected]> Twitter: @7a_ http://7-a.org
# Requires lxml, installation instructions here: http://lxml.de/installation.html
# Installation in Backtrack 5: /usr/bin/easy_install --allow-hosts=lxml.de,*.python.org lxml
# Tip for Ubuntu courtesy of Mario Heiderich: Python2.7-dev is needed to compile this lib properly
# Clean HTML reference: http://lxml.de/lxmlhtml.html#cleaning-up-html
# Library documentation: http://lxml.de/api/lxml.html.clean.Cleaner-class.html
from lxml.html.clean import Cleaner, clean_html
ALLOWED_TAGS = ('html', 'body', 'a', 'p', 'h1', 'h2', 'h3', 'h4', 'div', 'table', 'tbody', 'tr', 'td', 'th', 'strong', 'em', 'sup', 'sub', 'ul', 'ol', 'li')

class HTMLSanitiser:
        def __init__(self):
                self.Cleaner = Cleaner(scripts = False, javascript = False, comments = False, links = False, meta = True, page_structure = False, processing_instructions = False, embedded = False, frames = False, forms = False, annoying_tags = False, remove_unknown_tags = False, safe_attrs_only = True, allow_tags=ALLOWED_TAGS)

        def CleanThirdPartyHTML(self, HTML): # 1st apply white list, 2nd get rid of basics
                return clean_html(self.Cleaner.clean_html(HTML))

# For testing as a standalone script:
Sanitiser = HTMLSanitiser()
with open('input.txt') as file:
        print Sanitiser.CleanThirdPartyHTML(file.read())