Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require 'open-uri'
- require 'cgi'
- class HTMLSasnitizer
- attr_accessor :html
- @@ignore_tags = ['head','script','frameset']
- @@inline_tags = ['span','strong','i','u']
- @@block_tags = ['p','div','ul','ol']
- def initialize(source='')
- begin
- @html = open(source).read
- rescue Errono::ENOENT
- @html = source
- end
- end
- def plain_text
- @plain_text = @html.gsub(/\s*(<.*?>)/m,'1')
- handle_ignore_tags
- handle_inline_tags
- handle_block_tags
- handle_all_other_tags
- return CGI.unescapeHTML(@plain_text)
- end
- private
- def tag_regex(tag)
- %r{<#{tag}.*?>(.*?)</#{tag}>}mi
- end
- def handle_ignore_tags
- @@ignore_tags.each {|tag| @palin_text.gsub!(tag_regex(tag),'')}
- end
- def handle_inline_tags
- @@inline_tags.each{|tag| @plain_text.gsub!(tag_regex(tag),'\1')}
- end
- def handle_block_tags
- @@block_tags.each {|tag| @palin_text.gsub!(tag_regex(tag),"\n\\1\n")}
- end
- def handle_all_other_tags
- @@block_tags.each {|tag| @plain_text.gsub!(tag_regex(tag),"\n\\1\n")}
- end
- end
Add Comment
Please, Sign In to add comment