Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on Jul 4th, 2012  |  syntax: None  |  size: 7.44 KB  |  hits: 9  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. module Normalizer
  2.   @terms = {'SE' => 'special edition', 'se' => 'special edition', 'special edition' => 'se', 'Special Edition' => 'se', 'Limited Edition' => 'le', 'limited edition' => 'le', 'HD' => 'heavy duty', 'WT' => 'work truck', 'Work Truck' => 'wt', 'ann' => 'anniversary', 'editio n' => 'edition', 'anniversary' => 'ann', 'technology' => 'tech', 'tech' => 'technology', 'limited' => 'ltd', 'ltd' => 'limited'}
  3.  
  4.   def Normalizer.replace_synonyms(str)
  5.     return str if str.nil? || str == ''
  6.     words = str.split(' ')
  7.     replaced_words = []  
  8.     words.each_with_index do |word, index|
  9.       double_word = "#{words[index]} #{words[index+1]}"
  10.       if @terms.key?(double_word) && !replaced_words.include?(double_word)
  11.         words[index] = @terms[double_word]
  12.         words.delete_at(index+1)
  13.         replaced_words << words[index]
  14.       elsif @terms.key?(word) && !replaced_words.include?(@terms[word])
  15.         words[index] = @terms[word]
  16.         replaced_words << @terms[word]
  17.       end
  18.     end
  19.    
  20.     return words.join(' ')
  21.   end
  22.  
  23.   def Normalizer.normalize(keyword, terms)
  24.     return {:match => nil, :match_type => :empty} if keyword.nil? or keyword == '' or terms.nil? or terms.length == 0
  25.     temp_matcher = Normalizer.replace_synonyms(keyword).downcase
  26.     return {:match => terms.select{|term| term.downcase == temp_matcher}[0], :match_type => :exact_match} if terms.select{|term| term.downcase == temp_matcher}.length > 0
  27.     return {:match => keyword, :match_type => :exact_match} if terms.include?(keyword)
  28.     return {:match => terms.select{|term| term.downcase == keyword.downcase}[0], :match_type => :exact_match} if terms.select{|term| term.downcase == keyword.downcase}.length > 0
  29.     temp_matcher = keyword.gsub('/', '').gsub('-', '').downcase
  30.     return {:match => terms.select{|term|  temp_matcher == term.gsub('/', '').gsub('-', '').downcase}[0], :match_type => :exact_match} if terms.select{|term| temp_matcher == term.gsub('/', '').gsub('-', '').downcase}.length > 0
  31.     return {:match => terms.select{|term| term.downcase.include?(keyword.downcase)}.min{|x,y| x.size <=> y.size}, :match_type => :sub_match} if terms.select{|term| term.downcase.include?(keyword.downcase)}.length > 0
  32.     return {:match => terms.select{|term| term.downcase.include?(keyword.gsub('-', ' ').downcase)}.min{|x,y| x.size <=> y.size}, :match_type => :sub_match} if terms.select{|term| term.downcase.include?(keyword.gsub('-', ' ').downcase)}.length > 0
  33.    
  34.     matched_term = ''
  35.     terms.each do |term|
  36.       if term.split(' ').length > 1
  37.         matched_term = {:match => term, :match_type => :sub_match} if keyword.downcase.include?(term.split(' ')[0].downcase.gsub('-', '')) && keyword.downcase.include?(term.split(' ')[1].downcase.gsub('-', '')) && matched_term.length < term.length
  38.       end
  39.     end
  40.    
  41.     if matched_term != ''
  42.       hold_term = ''
  43.       hold_number_of_matched_terms = 0
  44.       terms.each do |term|
  45.         number_of_matched_terms = 0
  46.         Normalizer.replace_synonyms(keyword.downcase.gsub("-", '')).split(' ').each do |a_keyword|
  47.           if term.downcase.gsub("-", ' ').split(' ').include?(a_keyword)
  48.             number_of_matched_terms = number_of_matched_terms + 1
  49.           end
  50.         end
  51.         if (Normalizer.replace_synonyms(keyword.downcase.gsub("-", '')).split(' ') & term.downcase.gsub("-", ' ').split(' ')).length >= number_of_matched_terms
  52.           hold_number_of_matched_terms = (Normalizer.replace_synonyms(keyword.gsub("-", '').downcase).split(' ') & term.gsub("-", '').downcase.split(' ')).length
  53.           hold_term = term
  54.         elsif number_of_matched_terms > hold_number_of_matched_terms
  55.           hold_number_of_matched_terms = number_of_matched_terms
  56.           hold_term = term
  57.         end
  58.       end
  59.     end
  60.    
  61.     if hold_term != '' && !hold_term.nil? && matched_term == ''
  62.       return Normalizer.post_process(keyword, {:match => hold_term, :match_type => :sub_match}, terms)
  63.     elsif hold_term != '' && !hold_term.nil? && hold_term.length > matched_term[:match].length
  64.       return Normalizer.post_process(keyword, {:match => hold_term, :match_type => :sub_match}, terms)
  65.     elsif matched_term != ''
  66.       return Normalizer.post_process(keyword, matched_term, terms)
  67.     end
  68.  
  69.     matched_term = ''
  70.     terms.each do |term|
  71.       if term.split(' ').length > 1
  72.         matched_term = {:match => term, :match_type => :sub_match} if keyword.gsub(' ', '').downcase.include?(term.split(' ')[0].downcase.gsub('-', '')) && keyword.gsub(' ', '').downcase.include?(term.split(' ')[1].downcase.gsub('-', '')) && matched_term.length < term.length
  73.       end
  74.     end
  75.     return matched_term if matched_term != ''
  76.        
  77.     matched_term = ''
  78.     terms.each do |term|
  79.       if term.split(' ').length > 1
  80.         matched_term = {:match => term, :match_type => :sub_match} if term.gsub(' ', '').downcase.include?(keyword.split(' ')[0].downcase.gsub('-', '').gsub(' ', '')) && term.gsub(' ', '').downcase.include?(keyword.split(' ')[1].downcase.gsub('-', '').gsub(' ', '')) && matched_term.length < term.length
  81.       end
  82.     end
  83.     return matched_term if matched_term != ''
  84.    
  85.     match = {:match => terms.select{|term| keyword.gsub('-', ' ').include?(term.gsub('-', ' '))}.max{|x,y| x.size <=> y.size}, :match_type => :sub_match} if terms.select{|term| keyword.gsub('-', ' ').include?(term.gsub('-', ' '))}.length > 0
  86.      
  87.     if !match.nil?
  88.       hold_term = ''
  89.       hold_number_of_matched_terms = 0
  90.       terms.each do |term|
  91.         number_of_matched_terms = 0
  92.         Normalizer.replace_synonyms(keyword.downcase.gsub("-", '')).split(' ').each do |a_keyword|
  93.           if term.downcase.gsub("-", ' ').split(' ').include?(a_keyword)
  94.             number_of_matched_terms = number_of_matched_terms + 1
  95.           end
  96.         end
  97.         if (Normalizer.replace_synonyms(keyword.downcase.gsub("-", '')).split(' ') & term.downcase.gsub("-", ' ').split(' ')).length > number_of_matched_terms
  98.           hold_number_of_matched_terms = (Normalizer.replace_synonyms(keyword.gsub("-", '').downcase).split(' ') & term.gsub("-", '').downcase.split(' ')).length
  99.           hold_term = term
  100.         elsif number_of_matched_terms > hold_number_of_matched_terms
  101.           hold_number_of_matched_terms = number_of_matched_terms
  102.           hold_term = term
  103.         end
  104.       end
  105.     end
  106.    
  107.     if hold_term != '' && match != nil && match[:match] < hold_term
  108.       return {:match => hold_term, :match_type => :sub_match}
  109.     elsif !match.nil?
  110.       return match
  111.     end
  112.    
  113.     return {:match => terms.select{|term| keyword.downcase.include?(term.downcase)}.max{|x,y| x.size <=> y.size}, :match_type => :sub_match} if terms.select{|term| keyword.downcase.include?(term.downcase)}.length > 0
  114.      
  115.     lowest_checksum_difference = 99
  116.     hold_term = ''
  117.     terms.each do |term|
  118.       if (term.sum - keyword.sum).abs < lowest_checksum_difference
  119.         hold_term = term
  120.         lowest_checksum_difference = (term.sum - keyword.sum).abs
  121.       end
  122.     end
  123.     return {:match => hold_term, :match_type => :closest_match}  if hold_term != ''
  124.    
  125.     return {:match => terms.min{|x,y| x.sum <=> keyword.sum}, :match_type => :sub_match }
  126.   end
  127.  
  128.   def Normalizer.post_process(keyword, match, terms)
  129.     if terms.include?(match[:match]) && keyword.downcase.gsub("-", '').sum == match[:match].downcase.gsub("-", ' ').sum
  130.       return {:match => match[:match], :match_type => :exact_match}
  131.     else
  132.       return match
  133.     end
  134.   end
  135. end