Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require 'twitter'
- require 'pp'
- open('tweet_data.txt', 'w') { |file|
- (1..900).each do |number|
- Twitter::Search.new('#strangeloop').page(number).fetch.results.each do |tweet|
- file.puts tweet.text
- end
- end
- }
- def parsed_words_from text
- word_list = []
- demarcating_characters = [",", ".", "\"", "'", ";", "\n", "\r", "[", "]", "{", "}", "", " ", "(", ")", ":"]
- current_word = ""
- text.each_char{|this_character|
- if not demarcating_characters.include? this_character
- current_word += this_character
- else
- word_list.push current_word.downcase if current_word.length > 0
- current_word = ""
- end
- }
- return word_list
- end
- text_file_containing_tweets = 'tweet_data.txt'
- tweet_corpus = {}
- unimportant_list = %w{their didn't about would really going that's there & this}
- File.open(text_file_containing_tweets).each_line{|tweet_text|
- words_in_tweet = parsed_words_from(tweet_text)
- words_in_tweet.each{|word|
- next if word == nil or word.length < 5
- next if unimportant_list.include? word
- if tweet_corpus.key? word
- tweet_corpus[word] += 1
- else
- tweet_corpus[word] = 1
- end
- }
- }
- sorted_pairs = tweet_corpus.sort { |left, right|
- -1 * ( left[1] <=> right[1])
- }
- sorted_pairs.each{|pair|
- puts "#{pair[0]}: #{pair[1]}"
- }
- puts "Total corpus count: #{sorted_pairs.length}"
Add Comment
Please, Sign In to add comment