Guest User

Untitled

a guest
Jan 22nd, 2018
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.85 KB | None | 0 0
  1. module Normalizer
  2. @terms = {'SE' => 'special edition', 'se' => 'special edition', 'special edition' => 'se', 'Special Edition' => 'se', 'Limited Edition' => 'le', 'limited edition' => 'le', 'HD' => 'heavy duty', 'WT' => 'work truck', 'Work Truck' => 'wt', 'ann' => 'anniversary', 'editio n' => 'edition', 'anniversary' => 'ann', 'technology' => 'tech', 'tech' => 'technology', 'limited' => 'ltd', 'ltd' => 'limited'}
  3.  
  4. def Normalizer.replace_synonyms(str)
  5. return str if str.nil? || str == ''
  6. words = str.split(' ')
  7. replaced_words = []
  8. words.each_with_index do |word, index|
  9. double_word = "#{words[index]} #{words[index+1]}"
  10. if @terms.key?(double_word) && !replaced_words.include?(double_word)
  11. words[index] = @terms[double_word]
  12. words.delete_at(index+1)
  13. replaced_words << words[index]
  14. elsif @terms.key?(word) && !replaced_words.include?(@terms[word])
  15. words[index] = @terms[word]
  16. replaced_words << @terms[word]
  17. end
  18. end
  19.  
  20. return words.join(' ')
  21. end
  22.  
  23. def Normalizer.normalize(keyword, terms)
  24. return {:match => nil, :match_type => :empty} if keyword.nil? or keyword == '' or terms.nil? or terms.length == 0
  25. downcased_keyword = keyword.downcase
  26. keyword_after_synonyms_replaced = Normalizer.replace_synonyms(downcased_keyword)
  27. return {:match => terms.select{|term| term.downcase == keyword_after_synonyms_replaced}[0], :match_type => :exact_match} if terms.select{|term| term.downcase == keyword_after_synonyms_replaced}.length > 0
  28. return {:match => keyword, :match_type => :exact_match} if terms.include?(keyword)
  29. return {:match => terms.select{|term| term.downcase == downcased_keyword}[0], :match_type => :exact_match} if terms.select{|term| term.downcase == downcased_keyword}.length > 0
  30. temp_matcher = downcased_keyword.gsub('-', '').gsub('/', '')
  31. return {:match => terms.select{|term| temp_matcher == term.gsub('/', '').gsub('-', '').downcase}[0], :match_type => :exact_match} if terms.select{|term| temp_matcher == term.gsub('/', '').gsub('-', '').downcase}.length > 0
  32. return {:match => terms.select{|term| term.downcase.include?(downcased_keyword)}.min{|x,y| x.size <=> y.size}, :match_type => :sub_match} if terms.select{|term| term.downcase.include?(downcased_keyword)}.length > 0
  33. keyword_downcased_with_spaces_for_dashes = downcased_keyword.gsub('-', ' ')
  34. return {:match => terms.select{|term| term.downcase.include?(keyword_downcased_with_spaces_for_dashes)}.min{|x,y| x.size <=> y.size}, :match_type => :sub_match} if terms.select{|term| term.downcase.include?(keyword_downcased_with_spaces_for_dashes)}.length > 0
  35.  
  36. matched_term = ''
  37. terms.each do |term|
  38. if term.include?(' ')
  39. matched_term = {:match => term, :match_type => :sub_match} if downcased_keyword.include?(term.split(' ')[0].downcase.gsub('-', '')) && downcased_keyword.include?(term.split(' ')[1].downcase.gsub('-', '')) && matched_term.length < term.length
  40. end
  41. end
  42.  
  43. keyword_array_after_synonyms = keyword_after_synonyms_replaced.split(' ')
  44. if matched_term != ''
  45. hold_term = ''
  46. hold_number_of_matched_terms = 0
  47. terms.each do |term|
  48. number_of_matched_terms = 0
  49. keyword_array_after_synonyms.each do |a_keyword|
  50. if term.downcase.gsub('-', ' ').split(' ').include?(a_keyword)
  51. number_of_matched_terms = number_of_matched_terms + 1
  52. end
  53. end
  54. if (keyword_array_after_synonyms & term.downcase.gsub('-', ' ').split(' ')).length >= number_of_matched_terms
  55. hold_number_of_matched_terms = (keyword_array_after_synonyms & term.gsub('-', '').downcase.split(' ')).length
  56. hold_term = term
  57. elsif number_of_matched_terms > hold_number_of_matched_terms
  58. hold_number_of_matched_terms = number_of_matched_terms
  59. hold_term = term
  60. end
  61. end
  62. end
  63.  
  64. if hold_term != '' && !hold_term.nil? && matched_term == ''
  65. return Normalizer.post_process(keyword, {:match => hold_term, :match_type => :sub_match}, terms)
  66. elsif hold_term != '' && !hold_term.nil? && hold_term.length > matched_term[:match].length
  67. return Normalizer.post_process(keyword, {:match => hold_term, :match_type => :sub_match}, terms)
  68. elsif matched_term != ''
  69. return Normalizer.post_process(keyword, matched_term, terms)
  70. end
  71.  
  72. keyword_downcased_without_spaces = downcased_keyword.gsub(' ', '')
  73. matched_term = ''
  74. terms.each do |term|
  75. split_term = term.split(' ')
  76. if split_term.length > 1
  77. matched_term = {:match => term, :match_type => :sub_match} if keyword_downcased_without_spaces.include?(split_term[0].downcase.gsub('-', '')) && keyword_downcased_without_spaces.include?(split_term[1].downcase.gsub('-', '')) && matched_term.length < term.length
  78. end
  79. end
  80. return matched_term if matched_term != ''
  81.  
  82. matched_term = ''
  83. terms.each do |term|
  84. if term.include?(' ')
  85. downcased_keyword_split = downcased_keyword.split(' ') if downcased_keyword_split.nil?
  86. first_keyword = downcased_keyword_split[0].gsub('-', '').gsub(' ', '') if first_keyword.nil?
  87. second_keyword = downcased_keyword_split[1].gsub('-', '').gsub(' ', '') if second_keyword.nil?
  88. matched_term = {:match => term, :match_type => :sub_match} if term.gsub(' ', '').downcase.include?(first_keyword) && term.gsub(' ', '').downcase.include?(second_keyword) && matched_term.length < term.length
  89. end
  90. end
  91. return matched_term if matched_term != ''
  92.  
  93.  
  94. keyword_without_dashes = keyword.gsub('-', ' ')
  95. match = {:match => terms.select{|term| keyword_without_dashes.include?(term.gsub('-', ' '))}.max{|x,y| x.size <=> y.size}, :match_type => :sub_match} if terms.select{|term| keyword_without_dashes.include?(term.gsub('-', ' '))}.length > 0
  96.  
  97. if !match.nil?
  98. hold_term = ''
  99. hold_number_of_matched_terms = 0
  100. terms.each do |term|
  101. number_of_matched_terms = 0
  102. keyword_array_after_synonyms.each do |a_keyword|
  103. if term.downcase.gsub("-", ' ').split(' ').include?(a_keyword)
  104. number_of_matched_terms = number_of_matched_terms + 1
  105. end
  106. end
  107. if (keyword_array_after_synonyms & term.downcase.gsub("-", ' ').split(' ')).length > number_of_matched_terms
  108. hold_number_of_matched_terms = (keyword_array_after_synonyms & term.gsub("-", '').downcase.split(' ')).length
  109. hold_term = term
  110. elsif number_of_matched_terms > hold_number_of_matched_terms
  111. hold_number_of_matched_terms = number_of_matched_terms
  112. hold_term = term
  113. end
  114. end
  115. end
  116.  
  117. if hold_term != '' && match != nil && match[:match] < hold_term
  118. return {:match => hold_term, :match_type => :sub_match}
  119. elsif !match.nil?
  120. return match
  121. end
  122.  
  123. return {:match => terms.select{|term| downcased_keyword.include?(term.downcase)}.max{|x,y| x.size <=> y.size}, :match_type => :sub_match} if terms.select{|term| downcased_keyword.include?(term.downcase)}.length > 0
  124.  
  125. lowest_checksum_difference = 99
  126. hold_term = ''
  127. keyword_sum = keyword.sum
  128. terms.each do |term|
  129. if (term.sum - keyword_sum).abs < lowest_checksum_difference
  130. hold_term = term
  131. lowest_checksum_difference = (term.sum - keyword_sum).abs
  132. end
  133. end
  134. return {:match => hold_term, :match_type => :closest_match} if hold_term != ''
  135.  
  136. return {:match => terms.min{|x,y| x.sum <=> keyword_sum}, :match_type => :sub_match }
  137. end
  138.  
  139. def Normalizer.post_process(keyword, match, terms)
  140. if terms.include?(match[:match]) && keyword.downcase.gsub("-", '').sum == match[:match].downcase.gsub("-", ' ').sum
  141. return {:match => match[:match], :match_type => :exact_match}
  142. else
  143. return match
  144. end
  145. end
  146. end
Add Comment
Please, Sign In to add comment