Advertisement
7a_

Untrusted HTML XSS/CSS challenge 2

7a_
Jan 26th, 2012
285
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.33 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # Created By Abraham Aranguren <name.surname@gmail.com> Twitter: @7a_ http://7-a.org
  3. # Requires lxml, installation instructions here: http://lxml.de/installation.html
  4. # Installation in Backtrack 5: /usr/bin/easy_install --allow-hosts=lxml.de,*.python.org lxml
  5. # Tip for Ubuntu courtesy of Mario Heiderich: Python2.7-dev is needed to compile this lib properly
  6. # Clean HTML reference: http://lxml.de/lxmlhtml.html#cleaning-up-html
  7. # Library documentation: http://lxml.de/api/lxml.html.clean.Cleaner-class.html
  8. from lxml.html.clean import Cleaner
  9.  
  10. ALLOWED_TAGS = ('html','body', 'a', 'p', 'h1', 'h2', 'h3', 'h4', 'div', 'table', 'tbody', 'tr', 'td', 'th', 'strong', 'em', 'sup', 'sub', 'ul', 'ol', 'li')
  11.  
  12. class HTMLSanitiser:
  13.         def __init__(self):
  14.                 self.Cleaner = Cleaner(javascript = False, comments = False, links = True, meta = True, page_structure = False, processing_instructions = False, embedded = False, frames = False, forms = False, annoying_tags = False, remove_unknown_tags = False, safe_attrs_only = True, allow_tags=ALLOWED_TAGS)
  15.  
  16.         def CleanThirdPartyHTML(self, HTML):
  17.                 return self.Cleaner.clean_html(HTML)
  18.  
  19. # For testing as a standalone script:
  20. Sanitiser = HTMLSanitiser()
  21. with open('input.txt') as file:
  22.         print Sanitiser.CleanThirdPartyHTML(file.read())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement