Untitled

def doMatching(file, origPattern):
    re.escape(origPattern)
    # if we make a find using the search for a missing character this will be used for printing the string with more characters
    missing = 0
    foundViaMissing = False
    # if we make a find using the search for a missing character this will be used for printing the string with less characters
    extra = 0
    foundViaExtra = False
    # make the found indices a set so we don't reference any duplicates
    indices = set()
    # we take the entire file and make it to a string since we can only read from it once
    entireFile = file.read()
    # check for an exact match
    iter = re.finditer(origPattern, entireFile)
    indices.update([m.start(0) for m in iter])
    # if we find an exact match then return the indices along with the phrase being matched
    if indices.__len__() != 0:
        print("at: " + str(list(indices)) + " there are EXACT match(es) of the phrase \'" + origPattern + "\'")
    else:
        # now we will do all 1 character differences from before the phrase to the last character in the phrase ie.: [0:lastChar]
        # this includes inserting any possible single item at the position, or having the position omitted altogether
        for i in range(len(origPattern) - 1):
            pattern = origPattern[0:i] + '.{0,2}' + origPattern[i + 1:]
            iterator = re.finditer(pattern, entireFile)
            indices.update([m.start(0) for m in iterator])
        # finish up with the last char and the space after the last char
        for i in range(len(origPattern) - 1, len(origPattern) + 1):
            pattern = origPattern[:i] + '.{0,2}'
            iterator = re.finditer(pattern, entireFile)
            indices.update([m.start(0) for m in iterator])
        # this will be used to determine if our pattern is used to find a phrase with a missing/extra character
        dif = difflib.Differ().compare(entireFile[list(indices)[0]:list(indices)[0] + len(origPattern)],origPattern)
        for elem in dif:
            print(elem)
            if elem.__contains__('-'):
                # when we output the string we found we have to account for our input being short by 1 char
                missing = 1
                break
            elif elem.__contains__('+'):
                # when we output the string we found we have to account for our input being longer by 1 char
                extra = -1
                break
        # This deals with two adjacent characters being swapped
        for i in range(len(origPattern) - 1):
            t = list(origPattern)
            t[i], t[i + 1] = t[i + 1], t[i]
            pattern = ''.join(t)
            iterator = re.finditer(pattern, entireFile)
            indices.update([m.start(0) for m in iterator])
        # print everything we've found
        if indices.__len__() != 0:
            print("\n\n----There are " + str(len(indices)) + " approximate matches----")
            for index in range(len(indices)):
                # print(i)
                print("A Match at: " + str(list(indices)[index]) + " yields \'" + entireFile[
                                                                                  list(indices)[
                                                                                      index]:
                                                                                  list(indices)[
                                                                                      index] + len(
                                                                                      origPattern) + missing + extra] + "\'")
        else:
            print("There were no matches to your phrase")