Advertisement
Guest User

patterns

a guest
Nov 23rd, 2014
172
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.84 KB | None | 0 0
  1. import re
  2. import sys
  3.  
  4.  
  5. def parse_patterns(input_file, displayable_patterns, regex_patterns):
  6.     # not sure if these checks are needed, but just in case
  7.     try:
  8.         num_patterns = int(input_file.readline())
  9.     except ValueError:
  10.         print("Pattern count input is not an int.")
  11.         sys.exit(0)
  12.     else:
  13.         if num_patterns <= 0:
  14.             print("Pattern count input must be positive int. Try again.")
  15.             sys.exit(0)
  16.     for x in xrange(num_patterns):
  17.         pattern = input_file.readline().strip("\n")
  18.         displayable_patterns.append(pattern)
  19.         pattern_sections = ["(.*)" if section == "*" else "(" + re.escape(section) + ")" for section in pattern.split(',')]
  20.         regex_patterns.append(re.compile("/".join(pattern_sections)))
  21.  
  22.  
  23. def parse_paths(input_file, paths):
  24.     try:
  25.         num_paths = int(input_file.readline())
  26.     except ValueError:
  27.         print("Path count input is not an int.")
  28.         sys.exit(0)
  29.     else:
  30.         if num_paths <= 0:
  31.             print("Path count input must be positive int. Try again.")
  32.             sys.exit(0)
  33.     for x in xrange(num_paths):
  34.         paths.append(input_file.readline().strip("\n"))
  35.  
  36.  
  37. def match_path(displayable_patterns, regex_patterns, no_wildcard_patterns, wildcard_patterns, path):
  38.     for idx, pattern in enumerate(regex_patterns):
  39.         # if the number of fields are not equal, assume not a match
  40.         if pattern.groups != len(path.split("/")):
  41.             continue
  42.         elif pattern.match(path):
  43.             # separate matched patterns into lists differentiated by if a wildcard exists, if no wildcard then exact match and break
  44.             if displayable_patterns[idx].count("*") == 0:
  45.                 no_wildcard_patterns.append(displayable_patterns[idx])
  46.                 break
  47.             else:
  48.                 wildcard_patterns.append(displayable_patterns[idx])
  49.  
  50.  
  51. def choose_best_pattern(patterns):
  52.     previous_leftmost_wildcard_index = 0
  53.     temp_patterns = []
  54.     for index, pattern in enumerate(patterns):
  55.         current_leftmost_wildcard_index = find_leftmost_wildcard_index(pattern)
  56.         if current_leftmost_wildcard_index == previous_leftmost_wildcard_index:
  57.             temp_patterns.append(pattern[2:])
  58.         elif current_leftmost_wildcard_index > previous_leftmost_wildcard_index:
  59.             return index
  60.     return choose_best_pattern(temp_patterns)
  61.  
  62.  
  63. def find_leftmost_wildcard_index(pattern):
  64.     for index, section in enumerate(pattern.split(",")):
  65.         if section == "*":
  66.             return index
  67.  
  68.  
  69. def pattern_match():
  70.     input_file = open(sys.argv[1], 'r')
  71.     output_file = open(sys.argv[2], 'w')
  72.     regex_patterns = []
  73.     displayable_patterns = []
  74.     paths = []
  75.  
  76.     parse_patterns(input_file, displayable_patterns, regex_patterns)
  77.     parse_paths(input_file, paths)
  78.     input_file.close()
  79.  
  80.     for path in paths:
  81.         no_wildcard_patterns = []
  82.         wildcard_patterns = []
  83.         match_path(displayable_patterns, regex_patterns, no_wildcard_patterns, wildcard_patterns, path.strip("/"))
  84.         if no_wildcard_patterns:
  85.             output_file.write(no_wildcard_patterns[0] + "\n")
  86.         elif len(wildcard_patterns) > 0:
  87.             # assumption is patterns with less wildcards will match stronger. patterns without wildcards are filtered out already
  88.             min_wildcards = min([pattern.count("*") for pattern in wildcard_patterns])
  89.             wildcard_patterns = [pattern for pattern in wildcard_patterns if pattern.count("*") == min_wildcards]
  90.  
  91.             if len(wildcard_patterns) == 1:
  92.                 output_file.write(wildcard_patterns[0] + "\n")
  93.             else:
  94.                 output_file.write(wildcard_patterns[choose_best_pattern(wildcard_patterns)] + "\n")
  95.         else:
  96.             output_file.write("NO MATCH\n")
  97.     output_file.close()
  98.  
  99. pattern_match()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement