Guest User

Untitled

a guest
Jul 16th, 2018
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.24 KB | None | 0 0
  1. ##the problem
  2. I don't understand why my doc tests are failing. The self.whitelist variable seems like it's being treated as a static variable instead of a class one.
  3.  
  4. ##the errors:
  5. **********************************************************************
  6. File "my_sanitizer.py", line 87, in __main__.HTMLFilterFactory.add_tags
  7. Failed example:
  8. z.white_list == {'p': {}, 'i': {}, 'em': {}, 'strong': {}, 'b': {}, 'u': {}, 'h1': {}}
  9. Expected:
  10. True
  11. Got:
  12. False
  13. **********************************************************************
  14. File "my_sanitizer.py", line 97, in __main__.HTMLFilterFactory.remove_tags
  15. Failed example:
  16. y.white_list == {'p': {}, 'i': {}, 'em': {}, 'strong': {}, 'b': {}, 'u': {}, 'h1': {}}
  17. Expected:
  18. True
  19. Got:
  20. False
  21. **********************************************************************
  22. File "my_sanitizer.py", line 100, in __main__.HTMLFilterFactory.remove_tags
  23. Failed example:
  24. y.white_list
  25. Expected:
  26. {}
  27. Got:
  28. {'body': {'id': (<function <lambda> at 0x896b924>,)}}
  29. **********************************************************************
  30. 2 items had failures:
  31. 1 of 3 in __main__.HTMLFilterFactory.add_tags
  32. 2 of 5 in __main__.HTMLFilterFactory.remove_tags
  33. ***Test Failed*** 3 failures.
  34.  
  35. ##my code
  36. import BeautifulSoup
  37.  
  38. class HTMLFilterFactory:
  39. """
  40. This class allows you to dynamically create a custom HTML filter. You create
  41. a factory and add tags to it using bracket syntax. The value
  42. you specify is another dictionary that contains valid attribute names as
  43. keys and filter literals as values.
  44.  
  45. The exception is inner_html which is treated as inner_html instead of
  46. an attribute.
  47.  
  48. If you set an attribute's value to an empty string, the attribute will be
  49. deleted.
  50.  
  51. >>> x = HTMLFilterFactory()
  52. >>> soria_only = lambda text: text == "soria" and "soria" or ""
  53. >>> x.add_tags("body p")
  54. >>> x["body"]["id"] = (soria_only,)
  55. >>> x["p"]["id"] = (soria_only,)
  56. >>> x["p"]["inner_html"] = (soria_only,)
  57. >>> x(r'''<BODY ID = "soria" ONLOAD = "alert('XSS')"><p id= 'tundra'>inner html that isn't soria</p></BODY>''')
  58. u'<body id="soria"><p></p></body>'
  59. """
  60. def __init__(self, white_list = {}, tags = ""):
  61. """You may optionally add a string of tags as per the add_tags method"""
  62. self.white_list = white_list
  63. self.add_tags(tags)
  64.  
  65. def __getitem__(self, key):
  66. '''This returns the whitelisted tag by that name'''
  67. return self.white_list[key]
  68.  
  69. def __setitem__(self, key, value):
  70. '''This assigns the whitelisted key to the value. The value should be
  71. an empty dictionary or a dictionary with valid attribute names as keys
  72. that point to sequences of filters you wish to apply to the attribute
  73. value.
  74. >>> x = HTMLFilterFactory()
  75. >>> soria_only = lambda text: text == "soria" and "soria" or ""
  76. >>> x["body"] = {"id":(soria_only,)}'''
  77. self.white_list[key] = value
  78.  
  79. def __call__(self, text):
  80. '''This applies the filters to the text and returns the text'''
  81. soup = BeautifulSoup.BeautifulSoup(text)
  82.  
  83. for tag in soup.findAll(True):
  84. if tag.name not in self.white_list:
  85. tag.hidden = True
  86. else:
  87. for attr, val in tag.attrs:
  88. if attr in self.white_list[tag.name]:
  89. for filter in self.white_list[tag.name][attr]:
  90. tag[attr] = filter(val)
  91. if tag[attr] == '':
  92. del(tag[attr])
  93. else:
  94. del(tag[attr])
  95. if self.white_list[tag.name].has_key("inner_html") == True:
  96. #If this tag has filters for inner_html, use the filters
  97. #on the inner_html of the tag
  98. for filter in self.white_list[tag.name]["inner_html"]:
  99. inner_html = tag.firstText(
  100. text = True, recursive = False)
  101. tag.firstText(
  102. text = True, recursive = False).replaceWith(
  103. filter(inner_html))
  104.  
  105. return unicode(soup)
  106.  
  107. def add_tags(self, tags):
  108. '''This whitelists one or more tags without specifying attributes or
  109. filters. (It's good if you want to add a lot of empty tags)
  110. >>> z = HTMLFilterFactory()
  111. >>> z.add_tags("p i em strong b u h1")
  112. >>> z.white_list == \
  113. {'p': {}, 'i': {}, 'em': {}, 'strong': {}, 'b': {}, 'u': {}, 'h1': {}}
  114. True'''
  115. for tag in tags.split():
  116. self.white_list[tag] = {}
  117.  
  118. def remove_tags(self, tags):
  119. '''This removes one or more tags from the whitelist.
  120. >>> y = HTMLFilterFactory()
  121. >>> y.add_tags("p i em strong b u h1")
  122. >>> y.white_list == \
  123. {'p': {}, 'i': {}, 'em': {}, 'strong': {}, 'b': {}, 'u': {}, 'h1': {}}
  124. True
  125. >>> y.remove_tags("p i em strong b u h1")
  126. >>> y.white_list
  127. {}'''
  128. for tag in tags.split():
  129. del(self.white_list[tag])
  130.  
  131. if __name__ == "__main__":
  132. import doctest
  133. doctest.testmod()
Add Comment
Please, Sign In to add comment