Guest User

Untitled

a guest
Sep 2nd, 2015
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.19 KB | None | 0 0
  1. # encoding: utf-8
  2.  
  3. module MonitoringStats
  4. module Templates
  5. class RegexpCompiler
  6.  
  7. MAPPINGS = {
  8. /%d\+/i => proc {"(\\d+ )*\\d+"},
  9. /%w\+/i => proc {"(\\S+ )*\\S+"},
  10. /%d\{(\d+),(\d+)\}/i => proc {|match| "(\\d+\\b ?)#{limits_for(match)}"},
  11. /%w\{(\d+),(\d+)\}/i => proc {|match| "(\\S+\\b ?)#{limits_for(match)}"},
  12. /%d/i => proc {"\\d+"},
  13. /%w/i => proc {"\\S+"}
  14. }
  15.  
  16. PUNCTUATION = /([^а-яёa-z0-9\s])+?/i
  17.  
  18. def substitutions_regexp
  19. regexp = MAPPINGS.keys.map {|str| "(#{str.source})"}.join("|")
  20. Regexp.new(regexp, Regexp::IGNORECASE)
  21. end
  22.  
  23. def compile(templates)
  24. regexp = Array(templates).map {|t| "(?<id#{t.id}>" + compile_template(t) + ")"}.join("|")
  25. Regexp.new(regexp, Regexp::IGNORECASE)
  26. end
  27.  
  28. def prepare_pattern(pattern)
  29. pattern = replace_special_seqs(pattern) {|match, _| match}
  30. StringTools.collapse_spaces(pattern)
  31. end
  32.  
  33. def prepare_text(str)
  34. StringTools.collapse_spaces(cut_punctuation(str))
  35. end
  36.  
  37. def compile_template(template)
  38. pattern = StringTools.collapse_spaces(template.text)
  39. pattern = compile_special_seqs(pattern)
  40. compiled = StringTools.collapse_spaces(pattern)
  41. "\\A"+ compiled + "\\z"
  42. end
  43.  
  44. private
  45.  
  46. def compile_special_seqs(pattern)
  47. replace_special_seqs(pattern) {|match, replacement| instance_exec(match, &replacement)}
  48. end
  49.  
  50. def replace_special_seqs(pattern)
  51. pattern.force_encoding('utf-8').gsub(special_seqs_regexp) do |match|
  52. _, replacement = MAPPINGS.find {|key, _| match =~ key}
  53. replacement ? yield(match, replacement) : ""
  54. end
  55. end
  56.  
  57. def special_seqs_regexp
  58. @_special_seqs_regexp ||= begin
  59. regexp = (MAPPINGS.keys + [PUNCTUATION]).map {|str| "(#{str.source})"}.join("|")
  60. Regexp.new(regexp, Regexp::IGNORECASE)
  61. end
  62. end
  63.  
  64. def limits_for(str)
  65. _, low, high = /(\d+),(\d+)/.match(str).to_a
  66. "{#{low.to_i},#{high.to_i}}"
  67. end
  68.  
  69. def cut_punctuation(str)
  70. str.gsub(PUNCTUATION, "")
  71. end
  72.  
  73. end
  74. end
  75. end
Add Comment
Please, Sign In to add comment