Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def doMatching(file, origPattern):
- re.escape(origPattern)
- # if we make a find using the search for a missing character this will be used for printing the string with more characters
- missing = 0
- foundViaMissing = False
- # if we make a find using the search for a missing character this will be used for printing the string with less characters
- extra = 0
- foundViaExtra = False
- # make the found indices a set so we don't reference any duplicates
- indices = set()
- # we take the entire file and make it to a string since we can only read from it once
- entireFile = file.read()
- # check for an exact match
- iter = re.finditer(origPattern, entireFile)
- indices.update([m.start(0) for m in iter])
- # if we find an exact match then return the indices along with the phrase being matched
- if indices.__len__() != 0:
- print("at: " + str(list(indices)) + " there are EXACT match(es) of the phrase \'" + origPattern + "\'")
- else:
- # now we will do all 1 character differences from before the phrase to the last character in the phrase ie.: [0:lastChar]
- # this includes inserting any possible single item at the position, or having the position omitted altogether
- for i in range(len(origPattern) - 1):
- pattern = origPattern[0:i] + '.{0,2}' + origPattern[i + 1:]
- iterator = re.finditer(pattern, entireFile)
- indices.update([m.start(0) for m in iterator])
- # finish up with the last char and the space after the last char
- for i in range(len(origPattern) - 1, len(origPattern) + 1):
- pattern = origPattern[:i] + '.{0,2}'
- iterator = re.finditer(pattern, entireFile)
- indices.update([m.start(0) for m in iterator])
- # this will be used to determine if our pattern is used to find a phrase with a missing/extra character
- dif = difflib.Differ().compare(entireFile[list(indices)[0]:list(indices)[0] + len(origPattern)],origPattern)
- for elem in dif:
- print(elem)
- if elem.__contains__('-'):
- # when we output the string we found we have to account for our input being short by 1 char
- missing = 1
- break
- elif elem.__contains__('+'):
- # when we output the string we found we have to account for our input being longer by 1 char
- extra = -1
- break
- # This deals with two adjacent characters being swapped
- for i in range(len(origPattern) - 1):
- t = list(origPattern)
- t[i], t[i + 1] = t[i + 1], t[i]
- pattern = ''.join(t)
- iterator = re.finditer(pattern, entireFile)
- indices.update([m.start(0) for m in iterator])
- # print everything we've found
- if indices.__len__() != 0:
- print("\n\n----There are " + str(len(indices)) + " approximate matches----")
- for index in range(len(indices)):
- # print(i)
- print("A Match at: " + str(list(indices)[index]) + " yields \'" + entireFile[
- list(indices)[
- index]:
- list(indices)[
- index] + len(
- origPattern) + missing + extra] + "\'")
- else:
- print("There were no matches to your phrase")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement