Guest User

Untitled

a guest
Jan 24th, 2018
57
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.02 KB | None | 0 0
  1. require 'open-uri'
  2. require 'cgi'
  3.  
  4. class HTMLSasnitizer
  5. attr_accessor :html
  6. @@ignore_tags = ['head','script','frameset']
  7. @@inline_tags = ['span','strong','i','u']
  8. @@block_tags = ['p','div','ul','ol']
  9.  
  10. def initialize(source='')
  11. begin
  12. @html = open(source).read
  13. rescue Errono::ENOENT
  14. @html = source
  15. end
  16. end
  17.  
  18. def plain_text
  19. @plain_text = @html.gsub(/\s*(<.*?>)/m,'1')
  20. handle_ignore_tags
  21. handle_inline_tags
  22. handle_block_tags
  23. handle_all_other_tags
  24.  
  25. return CGI.unescapeHTML(@plain_text)
  26. end
  27.  
  28. private
  29.  
  30. def tag_regex(tag)
  31. %r{<#{tag}.*?>(.*?)</#{tag}>}mi
  32. end
  33.  
  34. def handle_ignore_tags
  35. @@ignore_tags.each {|tag| @palin_text.gsub!(tag_regex(tag),'')}
  36. end
  37.  
  38. def handle_inline_tags
  39. @@inline_tags.each{|tag| @plain_text.gsub!(tag_regex(tag),'\1')}
  40. end
  41.  
  42. def handle_block_tags
  43. @@block_tags.each {|tag| @palin_text.gsub!(tag_regex(tag),"\n\\1\n")}
  44. end
  45.  
  46. def handle_all_other_tags
  47. @@block_tags.each {|tag| @plain_text.gsub!(tag_regex(tag),"\n\\1\n")}
  48. end
  49.  
  50. end
Add Comment
Please, Sign In to add comment