patterns

import re
import sys


def parse_patterns(input_file, displayable_patterns, regex_patterns):
    # not sure if these checks are needed, but just in case
    try:
        num_patterns = int(input_file.readline())
    except ValueError:
        print("Pattern count input is not an int.")
        sys.exit(0)
    else:
        if num_patterns <= 0:
            print("Pattern count input must be positive int. Try again.")
            sys.exit(0)
    for x in xrange(num_patterns):
        pattern = input_file.readline().strip("\n")
        displayable_patterns.append(pattern)
        pattern_sections = ["(.*)" if section == "*" else "(" + re.escape(section) + ")" for section in pattern.split(',')]
        regex_patterns.append(re.compile("/".join(pattern_sections)))


def parse_paths(input_file, paths):
    try:
        num_paths = int(input_file.readline())
    except ValueError:
        print("Path count input is not an int.")
        sys.exit(0)
    else:
        if num_paths <= 0:
            print("Path count input must be positive int. Try again.")
            sys.exit(0)
    for x in xrange(num_paths):
        paths.append(input_file.readline().strip("\n"))


def match_path(displayable_patterns, regex_patterns, no_wildcard_patterns, wildcard_patterns, path):
    for idx, pattern in enumerate(regex_patterns):
        # if the number of fields are not equal, assume not a match
        if pattern.groups != len(path.split("/")):
            continue
        elif pattern.match(path):
            # separate matched patterns into lists differentiated by if a wildcard exists, if no wildcard then exact match and break
            if displayable_patterns[idx].count("*") == 0:
                no_wildcard_patterns.append(displayable_patterns[idx])
                break
            else:
                wildcard_patterns.append(displayable_patterns[idx])


def choose_best_pattern(patterns):
    previous_leftmost_wildcard_index = 0
    temp_patterns = []
    for index, pattern in enumerate(patterns):
        current_leftmost_wildcard_index = find_leftmost_wildcard_index(pattern)
        if current_leftmost_wildcard_index == previous_leftmost_wildcard_index:
            temp_patterns.append(pattern[2:])
        elif current_leftmost_wildcard_index > previous_leftmost_wildcard_index:
            return index
    return choose_best_pattern(temp_patterns)


def find_leftmost_wildcard_index(pattern):
    for index, section in enumerate(pattern.split(",")):
        if section == "*":
            return index


def pattern_match():
    input_file = open(sys.argv[1], 'r')
    output_file = open(sys.argv[2], 'w')
    regex_patterns = []
    displayable_patterns = []
    paths = []

    parse_patterns(input_file, displayable_patterns, regex_patterns)
    parse_paths(input_file, paths)
    input_file.close()

    for path in paths:
        no_wildcard_patterns = []
        wildcard_patterns = []
        match_path(displayable_patterns, regex_patterns, no_wildcard_patterns, wildcard_patterns, path.strip("/"))
        if no_wildcard_patterns:
            output_file.write(no_wildcard_patterns[0] + "\n")
        elif len(wildcard_patterns) > 0:
            # assumption is patterns with less wildcards will match stronger. patterns without wildcards are filtered out already
            min_wildcards = min([pattern.count("*") for pattern in wildcard_patterns])
            wildcard_patterns = [pattern for pattern in wildcard_patterns if pattern.count("*") == min_wildcards]

            if len(wildcard_patterns) == 1:
                output_file.write(wildcard_patterns[0] + "\n")
            else:
                output_file.write(wildcard_patterns[choose_best_pattern(wildcard_patterns)] + "\n")
        else:
            output_file.write("NO MATCH\n")
    output_file.close()

pattern_match()