Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import sys
- def parse_patterns(input_file, displayable_patterns, regex_patterns):
- # not sure if these checks are needed, but just in case
- try:
- num_patterns = int(input_file.readline())
- except ValueError:
- print("Pattern count input is not an int.")
- sys.exit(0)
- else:
- if num_patterns <= 0:
- print("Pattern count input must be positive int. Try again.")
- sys.exit(0)
- for x in xrange(num_patterns):
- pattern = input_file.readline().strip("\n")
- displayable_patterns.append(pattern)
- pattern_sections = ["(.*)" if section == "*" else "(" + re.escape(section) + ")" for section in pattern.split(',')]
- regex_patterns.append(re.compile("/".join(pattern_sections)))
- def parse_paths(input_file, paths):
- try:
- num_paths = int(input_file.readline())
- except ValueError:
- print("Path count input is not an int.")
- sys.exit(0)
- else:
- if num_paths <= 0:
- print("Path count input must be positive int. Try again.")
- sys.exit(0)
- for x in xrange(num_paths):
- paths.append(input_file.readline().strip("\n"))
- def match_path(displayable_patterns, regex_patterns, no_wildcard_patterns, wildcard_patterns, path):
- for idx, pattern in enumerate(regex_patterns):
- # if the number of fields are not equal, assume not a match
- if pattern.groups != len(path.split("/")):
- continue
- elif pattern.match(path):
- # separate matched patterns into lists differentiated by if a wildcard exists, if no wildcard then exact match and break
- if displayable_patterns[idx].count("*") == 0:
- no_wildcard_patterns.append(displayable_patterns[idx])
- break
- else:
- wildcard_patterns.append(displayable_patterns[idx])
- def choose_best_pattern(patterns):
- previous_leftmost_wildcard_index = 0
- temp_patterns = []
- for index, pattern in enumerate(patterns):
- current_leftmost_wildcard_index = find_leftmost_wildcard_index(pattern)
- if current_leftmost_wildcard_index == previous_leftmost_wildcard_index:
- temp_patterns.append(pattern[2:])
- elif current_leftmost_wildcard_index > previous_leftmost_wildcard_index:
- return index
- return choose_best_pattern(temp_patterns)
- def find_leftmost_wildcard_index(pattern):
- for index, section in enumerate(pattern.split(",")):
- if section == "*":
- return index
- def pattern_match():
- input_file = open(sys.argv[1], 'r')
- output_file = open(sys.argv[2], 'w')
- regex_patterns = []
- displayable_patterns = []
- paths = []
- parse_patterns(input_file, displayable_patterns, regex_patterns)
- parse_paths(input_file, paths)
- input_file.close()
- for path in paths:
- no_wildcard_patterns = []
- wildcard_patterns = []
- match_path(displayable_patterns, regex_patterns, no_wildcard_patterns, wildcard_patterns, path.strip("/"))
- if no_wildcard_patterns:
- output_file.write(no_wildcard_patterns[0] + "\n")
- elif len(wildcard_patterns) > 0:
- # assumption is patterns with less wildcards will match stronger. patterns without wildcards are filtered out already
- min_wildcards = min([pattern.count("*") for pattern in wildcard_patterns])
- wildcard_patterns = [pattern for pattern in wildcard_patterns if pattern.count("*") == min_wildcards]
- if len(wildcard_patterns) == 1:
- output_file.write(wildcard_patterns[0] + "\n")
- else:
- output_file.write(wildcard_patterns[choose_best_pattern(wildcard_patterns)] + "\n")
- else:
- output_file.write("NO MATCH\n")
- output_file.close()
- pattern_match()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement