Guest User

Untitled

a guest
Jun 20th, 2018
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.61 KB | None | 0 0
  1. require 'rubygems'
  2. require 'nokogiri'
  3.  
  4. some_html = <<-HTML
  5. <html>
  6. <head>
  7. <title>Title!</title>
  8. </head>
  9. <body>
  10. This is the body!
  11. </body>
  12. </html>
  13. HTML
  14.  
  15. class TextHandler < Nokogiri::XML::SAX::Document
  16. def initialize
  17. @chunks = []
  18. end
  19.  
  20. attr_reader :chunks
  21.  
  22. def cdata_block(string)
  23. characters(string)
  24. end
  25.  
  26. def start_element(name, attrs = [])
  27. if name == "head"
  28. puts name
  29. end
  30. end
  31.  
  32. def characters(string)
  33. @chunks << string.strip if string.strip != ""
  34. end
  35. end
  36. th = TextHandler.new
  37. parser = Nokogiri::HTML::SAX::Parser.new(th)
  38. parser.parse(some_html)
  39. puts th.chunks.inspect
Add Comment
Please, Sign In to add comment