Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ##the problem
- I don't understand why my doc tests are failing. The self.whitelist variable seems like it's being treated as a static variable instead of a class one.
- ##the errors:
- **********************************************************************
- File "my_sanitizer.py", line 87, in __main__.HTMLFilterFactory.add_tags
- Failed example:
- z.white_list == {'p': {}, 'i': {}, 'em': {}, 'strong': {}, 'b': {}, 'u': {}, 'h1': {}}
- Expected:
- True
- Got:
- False
- **********************************************************************
- File "my_sanitizer.py", line 97, in __main__.HTMLFilterFactory.remove_tags
- Failed example:
- y.white_list == {'p': {}, 'i': {}, 'em': {}, 'strong': {}, 'b': {}, 'u': {}, 'h1': {}}
- Expected:
- True
- Got:
- False
- **********************************************************************
- File "my_sanitizer.py", line 100, in __main__.HTMLFilterFactory.remove_tags
- Failed example:
- y.white_list
- Expected:
- {}
- Got:
- {'body': {'id': (<function <lambda> at 0x896b924>,)}}
- **********************************************************************
- 2 items had failures:
- 1 of 3 in __main__.HTMLFilterFactory.add_tags
- 2 of 5 in __main__.HTMLFilterFactory.remove_tags
- ***Test Failed*** 3 failures.
- ##my code
- import BeautifulSoup
- class HTMLFilterFactory:
- """
- This class allows you to dynamically create a custom HTML filter. You create
- a factory and add tags to it using bracket syntax. The value
- you specify is another dictionary that contains valid attribute names as
- keys and filter literals as values.
- The exception is inner_html which is treated as inner_html instead of
- an attribute.
- If you set an attribute's value to an empty string, the attribute will be
- deleted.
- >>> x = HTMLFilterFactory()
- >>> soria_only = lambda text: text == "soria" and "soria" or ""
- >>> x.add_tags("body p")
- >>> x["body"]["id"] = (soria_only,)
- >>> x["p"]["id"] = (soria_only,)
- >>> x["p"]["inner_html"] = (soria_only,)
- >>> x(r'''<BODY ID = "soria" ONLOAD = "alert('XSS')"><p id= 'tundra'>inner html that isn't soria</p></BODY>''')
- u'<body id="soria"><p></p></body>'
- """
- def __init__(self, white_list = {}, tags = ""):
- """You may optionally add a string of tags as per the add_tags method"""
- self.white_list = white_list
- self.add_tags(tags)
- def __getitem__(self, key):
- '''This returns the whitelisted tag by that name'''
- return self.white_list[key]
- def __setitem__(self, key, value):
- '''This assigns the whitelisted key to the value. The value should be
- an empty dictionary or a dictionary with valid attribute names as keys
- that point to sequences of filters you wish to apply to the attribute
- value.
- >>> x = HTMLFilterFactory()
- >>> soria_only = lambda text: text == "soria" and "soria" or ""
- >>> x["body"] = {"id":(soria_only,)}'''
- self.white_list[key] = value
- def __call__(self, text):
- '''This applies the filters to the text and returns the text'''
- soup = BeautifulSoup.BeautifulSoup(text)
- for tag in soup.findAll(True):
- if tag.name not in self.white_list:
- tag.hidden = True
- else:
- for attr, val in tag.attrs:
- if attr in self.white_list[tag.name]:
- for filter in self.white_list[tag.name][attr]:
- tag[attr] = filter(val)
- if tag[attr] == '':
- del(tag[attr])
- else:
- del(tag[attr])
- if self.white_list[tag.name].has_key("inner_html") == True:
- #If this tag has filters for inner_html, use the filters
- #on the inner_html of the tag
- for filter in self.white_list[tag.name]["inner_html"]:
- inner_html = tag.firstText(
- text = True, recursive = False)
- tag.firstText(
- text = True, recursive = False).replaceWith(
- filter(inner_html))
- return unicode(soup)
- def add_tags(self, tags):
- '''This whitelists one or more tags without specifying attributes or
- filters. (It's good if you want to add a lot of empty tags)
- >>> z = HTMLFilterFactory()
- >>> z.add_tags("p i em strong b u h1")
- >>> z.white_list == \
- {'p': {}, 'i': {}, 'em': {}, 'strong': {}, 'b': {}, 'u': {}, 'h1': {}}
- True'''
- for tag in tags.split():
- self.white_list[tag] = {}
- def remove_tags(self, tags):
- '''This removes one or more tags from the whitelist.
- >>> y = HTMLFilterFactory()
- >>> y.add_tags("p i em strong b u h1")
- >>> y.white_list == \
- {'p': {}, 'i': {}, 'em': {}, 'strong': {}, 'b': {}, 'u': {}, 'h1': {}}
- True
- >>> y.remove_tags("p i em strong b u h1")
- >>> y.white_list
- {}'''
- for tag in tags.split():
- del(self.white_list[tag])
- if __name__ == "__main__":
- import doctest
- doctest.testmod()
Add Comment
Please, Sign In to add comment