Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env ruby
- two_words = Hash.new(0)
- three_words = Hash.new(0)
- def windowed_words_to_hash(words, word_hash)
- i = 0
- while i < words.size-1
- key = [words[i], words[i+1]]
- word_hash[key] += count
- i += 1
- end
- end
- def add_count(query, num, hsh)
- query.inject([]) do |sum, word|
- sum << word
- hsh[sum] += num
- sum
- end
- end
- def dump_word_hash(hsh, filename)
- dump = hsh.map{|words,count| [words.join(' '), count].join("\t") }
- File.open(filename, 'a') do |f|
- dump.each{|line| f.puts(line) }
- end
- end
- File.open('searched_terms.txt') do |f|
- f.readlines.map do |line|
- words, count = line.chomp.split('\t')
- words = words.split(/\W+/)
- windowed_words_to_hash(words, two_words) if words.size >= 2
- windowed_words_to_hash(words, three_words) if words.size >= 3
- end
- end
- my_query = ['Best', 'Buy']
- add_count(my_query, 3, two_words)
- dump_word_hash(two_words, 'two_words')
Add Comment
Please, Sign In to add comment