- module Normalizer
- @terms = {'SE' => 'special edition', 'se' => 'special edition', 'special edition' => 'se', 'Special Edition' => 'se', 'Limited Edition' => 'le', 'limited edition' => 'le', 'HD' => 'heavy duty', 'WT' => 'work truck', 'Work Truck' => 'wt', 'ann' => 'anniversary', 'editio n' => 'edition', 'anniversary' => 'ann', 'technology' => 'tech', 'tech' => 'technology', 'limited' => 'ltd', 'ltd' => 'limited'}
- def Normalizer.replace_synonyms(str)
- return str if str.nil? || str == ''
- words = str.split(' ')
- replaced_words = []
- words.each_with_index do |word, index|
- double_word = "#{words[index]} #{words[index+1]}"
- if @terms.key?(double_word) && !replaced_words.include?(double_word)
- words[index] = @terms[double_word]
- words.delete_at(index+1)
- replaced_words << words[index]
- elsif @terms.key?(word) && !replaced_words.include?(@terms[word])
- words[index] = @terms[word]
- replaced_words << @terms[word]
- end
- end
- return words.join(' ')
- end
- def Normalizer.normalize(keyword, terms)
- return {:match => nil, :match_type => :empty} if keyword.nil? or keyword == '' or terms.nil? or terms.length == 0
- temp_matcher = Normalizer.replace_synonyms(keyword).downcase
- return {:match => terms.select{|term| term.downcase == temp_matcher}[0], :match_type => :exact_match} if terms.select{|term| term.downcase == temp_matcher}.length > 0
- return {:match => keyword, :match_type => :exact_match} if terms.include?(keyword)
- return {:match => terms.select{|term| term.downcase == keyword.downcase}[0], :match_type => :exact_match} if terms.select{|term| term.downcase == keyword.downcase}.length > 0
- temp_matcher = keyword.gsub('/', '').gsub('-', '').downcase
- return {:match => terms.select{|term| temp_matcher == term.gsub('/', '').gsub('-', '').downcase}[0], :match_type => :exact_match} if terms.select{|term| temp_matcher == term.gsub('/', '').gsub('-', '').downcase}.length > 0
- return {:match => terms.select{|term| term.downcase.include?(keyword.downcase)}.min{|x,y| x.size <=> y.size}, :match_type => :sub_match} if terms.select{|term| term.downcase.include?(keyword.downcase)}.length > 0
- return {:match => terms.select{|term| term.downcase.include?(keyword.gsub('-', ' ').downcase)}.min{|x,y| x.size <=> y.size}, :match_type => :sub_match} if terms.select{|term| term.downcase.include?(keyword.gsub('-', ' ').downcase)}.length > 0
- matched_term = ''
- terms.each do |term|
- if term.split(' ').length > 1
- matched_term = {:match => term, :match_type => :sub_match} if keyword.downcase.include?(term.split(' ')[0].downcase.gsub('-', '')) && keyword.downcase.include?(term.split(' ')[1].downcase.gsub('-', '')) && matched_term.length < term.length
- end
- end
- if matched_term != ''
- hold_term = ''
- hold_number_of_matched_terms = 0
- terms.each do |term|
- number_of_matched_terms = 0
- Normalizer.replace_synonyms(keyword.downcase.gsub("-", '')).split(' ').each do |a_keyword|
- if term.downcase.gsub("-", ' ').split(' ').include?(a_keyword)
- number_of_matched_terms = number_of_matched_terms + 1
- end
- end
- if (Normalizer.replace_synonyms(keyword.downcase.gsub("-", '')).split(' ') & term.downcase.gsub("-", ' ').split(' ')).length >= number_of_matched_terms
- hold_number_of_matched_terms = (Normalizer.replace_synonyms(keyword.gsub("-", '').downcase).split(' ') & term.gsub("-", '').downcase.split(' ')).length
- hold_term = term
- elsif number_of_matched_terms > hold_number_of_matched_terms
- hold_number_of_matched_terms = number_of_matched_terms
- hold_term = term
- end
- end
- end
- if hold_term != '' && !hold_term.nil? && matched_term == ''
- return Normalizer.post_process(keyword, {:match => hold_term, :match_type => :sub_match}, terms)
- elsif hold_term != '' && !hold_term.nil? && hold_term.length > matched_term[:match].length
- return Normalizer.post_process(keyword, {:match => hold_term, :match_type => :sub_match}, terms)
- elsif matched_term != ''
- return Normalizer.post_process(keyword, matched_term, terms)
- end
- matched_term = ''
- terms.each do |term|
- if term.split(' ').length > 1
- matched_term = {:match => term, :match_type => :sub_match} if keyword.gsub(' ', '').downcase.include?(term.split(' ')[0].downcase.gsub('-', '')) && keyword.gsub(' ', '').downcase.include?(term.split(' ')[1].downcase.gsub('-', '')) && matched_term.length < term.length
- end
- end
- return matched_term if matched_term != ''
- matched_term = ''
- terms.each do |term|
- if term.split(' ').length > 1
- matched_term = {:match => term, :match_type => :sub_match} if term.gsub(' ', '').downcase.include?(keyword.split(' ')[0].downcase.gsub('-', '').gsub(' ', '')) && term.gsub(' ', '').downcase.include?(keyword.split(' ')[1].downcase.gsub('-', '').gsub(' ', '')) && matched_term.length < term.length
- end
- end
- return matched_term if matched_term != ''
- match = {:match => terms.select{|term| keyword.gsub('-', ' ').include?(term.gsub('-', ' '))}.max{|x,y| x.size <=> y.size}, :match_type => :sub_match} if terms.select{|term| keyword.gsub('-', ' ').include?(term.gsub('-', ' '))}.length > 0
- if !match.nil?
- hold_term = ''
- hold_number_of_matched_terms = 0
- terms.each do |term|
- number_of_matched_terms = 0
- Normalizer.replace_synonyms(keyword.downcase.gsub("-", '')).split(' ').each do |a_keyword|
- if term.downcase.gsub("-", ' ').split(' ').include?(a_keyword)
- number_of_matched_terms = number_of_matched_terms + 1
- end
- end
- if (Normalizer.replace_synonyms(keyword.downcase.gsub("-", '')).split(' ') & term.downcase.gsub("-", ' ').split(' ')).length > number_of_matched_terms
- hold_number_of_matched_terms = (Normalizer.replace_synonyms(keyword.gsub("-", '').downcase).split(' ') & term.gsub("-", '').downcase.split(' ')).length
- hold_term = term
- elsif number_of_matched_terms > hold_number_of_matched_terms
- hold_number_of_matched_terms = number_of_matched_terms
- hold_term = term
- end
- end
- end
- if hold_term != '' && match != nil && match[:match] < hold_term
- return {:match => hold_term, :match_type => :sub_match}
- elsif !match.nil?
- return match
- end
- return {:match => terms.select{|term| keyword.downcase.include?(term.downcase)}.max{|x,y| x.size <=> y.size}, :match_type => :sub_match} if terms.select{|term| keyword.downcase.include?(term.downcase)}.length > 0
- lowest_checksum_difference = 99
- hold_term = ''
- terms.each do |term|
- if (term.sum - keyword.sum).abs < lowest_checksum_difference
- hold_term = term
- lowest_checksum_difference = (term.sum - keyword.sum).abs
- end
- end
- return {:match => hold_term, :match_type => :closest_match} if hold_term != ''
- return {:match => terms.min{|x,y| x.sum <=> keyword.sum}, :match_type => :sub_match }
- end
- def Normalizer.post_process(keyword, match, terms)
- if terms.include?(match[:match]) && keyword.downcase.gsub("-", '').sum == match[:match].downcase.gsub("-", ' ').sum
- return {:match => match[:match], :match_type => :exact_match}
- else
- return match
- end
- end
- end