Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require "rubygems"
- require "open-uri"
- require "nokogiri"
- def count_word(url)
- text = fetch_text(Nokogiri::HTML(open(url)))
- text.scan(/\w+/).inject(Hash.new(0)) { |count, word|
- count[word.downcase] += 1
- count
- }.sort { |a, b|
- b[1] <=> a[1]
- }
- end
- def fetch_text(e)
- if e.is_a? Nokogiri::XML::Text
- return e.text
- end
- e.children.inject(String.new) { |text, child|
- text << fetch_text(child)
- text << "\n"
- text
- }
- end
Add Comment
Please, Sign In to add comment