Advertisement
Guest User

Untitled

a guest
Apr 10th, 2016
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.67 KB | None | 0 0
  1. def doMatching(file, origPattern):
  2.     re.escape(origPattern)
  3.     # if we make a find using the search for a missing character this will be used for printing the string with more characters
  4.     missing = 0
  5.     foundViaMissing = False
  6.     # if we make a find using the search for a missing character this will be used for printing the string with less characters
  7.     extra = 0
  8.     foundViaExtra = False
  9.     # make the found indices a set so we don't reference any duplicates
  10.     indices = set()
  11.     # we take the entire file and make it to a string since we can only read from it once
  12.     entireFile = file.read()
  13.     # check for an exact match
  14.     iter = re.finditer(origPattern, entireFile)
  15.     indices.update([m.start(0) for m in iter])
  16.     # if we find an exact match then return the indices along with the phrase being matched
  17.     if indices.__len__() != 0:
  18.         print("at: " + str(list(indices)) + " there are EXACT match(es) of the phrase \'" + origPattern + "\'")
  19.     else:
  20.         # now we will do all 1 character differences from before the phrase to the last character in the phrase ie.: [0:lastChar]
  21.         # this includes inserting any possible single item at the position, or having the position omitted altogether
  22.         for i in range(len(origPattern) - 1):
  23.             pattern = origPattern[0:i] + '.{0,2}' + origPattern[i + 1:]
  24.             iterator = re.finditer(pattern, entireFile)
  25.             indices.update([m.start(0) for m in iterator])
  26.         # finish up with the last char and the space after the last char
  27.         for i in range(len(origPattern) - 1, len(origPattern) + 1):
  28.             pattern = origPattern[:i] + '.{0,2}'
  29.             iterator = re.finditer(pattern, entireFile)
  30.             indices.update([m.start(0) for m in iterator])
  31.         # this will be used to determine if our pattern is used to find a phrase with a missing/extra character
  32.         dif = difflib.Differ().compare(entireFile[list(indices)[0]:list(indices)[0] + len(origPattern)],origPattern)
  33.         for elem in dif:
  34.             print(elem)
  35.             if elem.__contains__('-'):
  36.                 # when we output the string we found we have to account for our input being short by 1 char
  37.                 missing = 1
  38.                 break
  39.             elif elem.__contains__('+'):
  40.                 # when we output the string we found we have to account for our input being longer by 1 char
  41.                 extra = -1
  42.                 break
  43.         # This deals with two adjacent characters being swapped
  44.         for i in range(len(origPattern) - 1):
  45.             t = list(origPattern)
  46.             t[i], t[i + 1] = t[i + 1], t[i]
  47.             pattern = ''.join(t)
  48.             iterator = re.finditer(pattern, entireFile)
  49.             indices.update([m.start(0) for m in iterator])
  50.         # print everything we've found
  51.         if indices.__len__() != 0:
  52.             print("\n\n----There are " + str(len(indices)) + " approximate matches----")
  53.             for index in range(len(indices)):
  54.                 # print(i)
  55.                 print("A Match at: " + str(list(indices)[index]) + " yields \'" + entireFile[
  56.                                                                                   list(indices)[
  57.                                                                                       index]:
  58.                                                                                   list(indices)[
  59.                                                                                       index] + len(
  60.                                                                                       origPattern) + missing + extra] + "\'")
  61.         else:
  62.             print("There were no matches to your phrase")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement