makispaiktis

DCP31 - Edit changes

Sep 28th, 2020
1,051
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. '''
  2. This problem was asked by Google.
  3. The edit distance between two strings refers to the minimum number of character insertions, deletions, and substitutions required to change one string
  4. to the other. For example, the edit distance between “kitten” and “sitting” is three: substitute the “k” for “s”, substitute the “e” for “i”, and append a “g”.
  5. Given two strings, compute the edit distance between them.
  6. '''
  7.  
  8. def indexForMaximization(message1, message2):
  9.     small = ""
  10.     big = ""
  11.     if len(message1) >= len(message2):
  12.         big = message1
  13.         small = message2
  14.     else:
  15.         big = message2
  16.         small = message1
  17.     # Example: "believe" and "unbelievable"
  18.     # "unbelie", "nbeliev", "BELIEVA", "elievab", "lievabl", "ievable"
  19.     # I will try to find the index in the big message, that maximizes the hits
  20.     N = len(big)
  21.     n = len(small)
  22.     maxHits = 0
  23.     index = -1
  24.     for i in range(0, N-n+1):
  25.         hits = 0
  26.         for j in range(n):
  27.             if big[i+j] == small[j]:
  28.                 hits += 1
  29.         if hits > maxHits:
  30.             maxHits = hits
  31.             index = i + j
  32.     # Now, j has the value of last iteration and last hit
  33.     # So, I have to remove the length of small message
  34.     rightIndex = index - n + 1
  35.     return big, small, N, n, rightIndex
  36.  
  37. def edit(message1, message2):
  38.     big, small, N, n, rightIndex = indexForMaximization(message1, message2)
  39.     # Now, I have the index in big message, that maximizes the hits
  40.     # I know that if I begin from index I have many hits ----> If I dont make hits, I have to change letters
  41.     # 1. SUBSTITUTIONS
  42.     substitutions = 0
  43.     subPrints = list()
  44.     for i in range(n):
  45.         if big[rightIndex+i] != small[i]:
  46.             substitutions += 1
  47.             subPrint = str(big[rightIndex+i]) + " ----> " + str(small[i])
  48.             subPrints.append(subPrint)
  49.  
  50.     # 2a. INSERTIONS
  51.     insertionsLeft = rightIndex
  52.     insLeftPrints = list()
  53.     for i in range(insertionsLeft):
  54.         insertionLeft = str("Insertion of " + str(big[i]))
  55.         insLeftPrints.append(insertionLeft)
  56.  
  57.     # 2b. INSERTIONS
  58.     insertionsRight = N - (rightIndex + n)
  59.     insRightPrints = list()
  60.     for i in range(N - insertionsRight, N):
  61.         insertionRight = str("Insertion of " + str(big[i]))
  62.         insRightPrints.append(insertionRight)
  63.  
  64.     return substitutions, subPrints, insertionsLeft, insLeftPrints, insertionsRight, insRightPrints
  65.  
  66. def prettyPrint(message1, message2):
  67.     big, small, N, n, rightIndex = indexForMaximization(message1, message2)
  68.     subs, subPrints, insLeft, insLeftPrints, insRight, insRightPrints = edit(message1, message2)
  69.     changes = subs + insLeft + insRight
  70.     print("******************************************************************")
  71.     print("Comparing '" + str(message1) + "' and '" + str(message2) + "':")
  72.     print("Changes = " + str(changes) + " <----> " + str(insLeft) + " insertionsLeft, " + str(subs) + " substitutions, " + str(insRight) + " insertionsRight")
  73.     print("******************************************************************")
  74.     print("    1. InsertionsLeft = " + str(insLeft))
  75.     for i in range(len(insLeftPrints)):
  76.         print("    " + str(insLeftPrints[i]))
  77.     print()
  78.     print("    2. Substitutions = " + str(subs))
  79.     for i in range(len(subPrints)):
  80.         print("    " + str(subPrints[i]))
  81.     print()
  82.     print("    3. InsertionsRight = " + str(insRight))
  83.     for i in range(len(insRightPrints)):
  84.         print("    " + str(insRightPrints[i]))
  85.     print()
  86.  
  87. # MAIN FUNCTION
  88. prettyPrint("believe", "unbelievable")
  89. prettyPrint("kite", "able")
  90. prettyPrint("suffer", "unsuffer")
  91. prettyPrint("sales", "sure")
RAW Paste Data