Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # encoding: utf-8
- module MonitoringStats
- module Templates
- class RegexpCompiler
- MAPPINGS = {
- /%d\+/i => proc {"(\\d+ )*\\d+"},
- /%w\+/i => proc {"(\\S+ )*\\S+"},
- /%d\{(\d+),(\d+)\}/i => proc {|match| "(\\d+\\b ?)#{limits_for(match)}"},
- /%w\{(\d+),(\d+)\}/i => proc {|match| "(\\S+\\b ?)#{limits_for(match)}"},
- /%d/i => proc {"\\d+"},
- /%w/i => proc {"\\S+"}
- }
- PUNCTUATION = /([^а-яёa-z0-9\s])+?/i
- def substitutions_regexp
- regexp = MAPPINGS.keys.map {|str| "(#{str.source})"}.join("|")
- Regexp.new(regexp, Regexp::IGNORECASE)
- end
- def compile(templates)
- regexp = Array(templates).map {|t| "(?<id#{t.id}>" + compile_template(t) + ")"}.join("|")
- Regexp.new(regexp, Regexp::IGNORECASE)
- end
- def prepare_pattern(pattern)
- pattern = replace_special_seqs(pattern) {|match, _| match}
- StringTools.collapse_spaces(pattern)
- end
- def prepare_text(str)
- StringTools.collapse_spaces(cut_punctuation(str))
- end
- def compile_template(template)
- pattern = StringTools.collapse_spaces(template.text)
- pattern = compile_special_seqs(pattern)
- compiled = StringTools.collapse_spaces(pattern)
- "\\A"+ compiled + "\\z"
- end
- private
- def compile_special_seqs(pattern)
- replace_special_seqs(pattern) {|match, replacement| instance_exec(match, &replacement)}
- end
- def replace_special_seqs(pattern)
- pattern.force_encoding('utf-8').gsub(special_seqs_regexp) do |match|
- _, replacement = MAPPINGS.find {|key, _| match =~ key}
- replacement ? yield(match, replacement) : ""
- end
- end
- def special_seqs_regexp
- @_special_seqs_regexp ||= begin
- regexp = (MAPPINGS.keys + [PUNCTUATION]).map {|str| "(#{str.source})"}.join("|")
- Regexp.new(regexp, Regexp::IGNORECASE)
- end
- end
- def limits_for(str)
- _, low, high = /(\d+),(\d+)/.match(str).to_a
- "{#{low.to_i},#{high.to_i}}"
- end
- def cut_punctuation(str)
- str.gsub(PUNCTUATION, "")
- end
- end
- end
- end
Add Comment
Please, Sign In to add comment