Guest User

Untitled

a guest
Jun 18th, 2018
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.33 KB | None | 0 0
  1. require 'twitter'
  2. require 'pp'
  3.  
  4. open('tweet_data.txt', 'w') { |file|
  5. (1..900).each do |number|
  6. Twitter::Search.new('#strangeloop').page(number).fetch.results.each do |tweet|
  7. file.puts tweet.text
  8. end
  9. end
  10. }
  11. def parsed_words_from text
  12. word_list = []
  13. demarcating_characters = [",", ".", "\"", "'", ";", "\n", "\r", "[", "]", "{", "}", "", " ", "(", ")", ":"]
  14.  
  15. current_word = ""
  16. text.each_char{|this_character|
  17. if not demarcating_characters.include? this_character
  18. current_word += this_character
  19. else
  20. word_list.push current_word.downcase if current_word.length > 0
  21. current_word = ""
  22. end
  23. }
  24. return word_list
  25. end
  26.  
  27. text_file_containing_tweets = 'tweet_data.txt'
  28.  
  29. tweet_corpus = {}
  30.  
  31. unimportant_list = %w{their didn't about would really going that's there & this}
  32.  
  33. File.open(text_file_containing_tweets).each_line{|tweet_text|
  34.  
  35. words_in_tweet = parsed_words_from(tweet_text)
  36.  
  37. words_in_tweet.each{|word|
  38. next if word == nil or word.length < 5
  39.  
  40. next if unimportant_list.include? word
  41.  
  42. if tweet_corpus.key? word
  43. tweet_corpus[word] += 1
  44. else
  45. tweet_corpus[word] = 1
  46. end
  47. }
  48. }
  49.  
  50. sorted_pairs = tweet_corpus.sort { |left, right|
  51. -1 * ( left[1] <=> right[1])
  52. }
  53.  
  54. sorted_pairs.each{|pair|
  55. puts "#{pair[0]}: #{pair[1]}"
  56. }
  57.  
  58. puts "Total corpus count: #{sorted_pairs.length}"
Add Comment
Please, Sign In to add comment