# DCP31 - Edit changes

Sep 28th, 2020
1,051
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. '''
3. The edit distance between two strings refers to the minimum number of character insertions, deletions, and substitutions required to change one string
4. to the other. For example, the edit distance between “kitten” and “sitting” is three: substitute the “k” for “s”, substitute the “e” for “i”, and append a “g”.
5. Given two strings, compute the edit distance between them.
6. '''
7.
8. def indexForMaximization(message1, message2):
9.     small = ""
10.     big = ""
11.     if len(message1) >= len(message2):
12.         big = message1
13.         small = message2
14.     else:
15.         big = message2
16.         small = message1
17.     # Example: "believe" and "unbelievable"
18.     # "unbelie", "nbeliev", "BELIEVA", "elievab", "lievabl", "ievable"
19.     # I will try to find the index in the big message, that maximizes the hits
20.     N = len(big)
21.     n = len(small)
22.     maxHits = 0
23.     index = -1
24.     for i in range(0, N-n+1):
25.         hits = 0
26.         for j in range(n):
27.             if big[i+j] == small[j]:
28.                 hits += 1
29.         if hits > maxHits:
30.             maxHits = hits
31.             index = i + j
32.     # Now, j has the value of last iteration and last hit
33.     # So, I have to remove the length of small message
34.     rightIndex = index - n + 1
35.     return big, small, N, n, rightIndex
36.
37. def edit(message1, message2):
38.     big, small, N, n, rightIndex = indexForMaximization(message1, message2)
39.     # Now, I have the index in big message, that maximizes the hits
40.     # I know that if I begin from index I have many hits ----> If I dont make hits, I have to change letters
41.     # 1. SUBSTITUTIONS
42.     substitutions = 0
43.     subPrints = list()
44.     for i in range(n):
45.         if big[rightIndex+i] != small[i]:
46.             substitutions += 1
47.             subPrint = str(big[rightIndex+i]) + " ----> " + str(small[i])
48.             subPrints.append(subPrint)
49.
50.     # 2a. INSERTIONS
51.     insertionsLeft = rightIndex
52.     insLeftPrints = list()
53.     for i in range(insertionsLeft):
54.         insertionLeft = str("Insertion of " + str(big[i]))
55.         insLeftPrints.append(insertionLeft)
56.
57.     # 2b. INSERTIONS
58.     insertionsRight = N - (rightIndex + n)
59.     insRightPrints = list()
60.     for i in range(N - insertionsRight, N):
61.         insertionRight = str("Insertion of " + str(big[i]))
62.         insRightPrints.append(insertionRight)
63.
64.     return substitutions, subPrints, insertionsLeft, insLeftPrints, insertionsRight, insRightPrints
65.
66. def prettyPrint(message1, message2):
67.     big, small, N, n, rightIndex = indexForMaximization(message1, message2)
68.     subs, subPrints, insLeft, insLeftPrints, insRight, insRightPrints = edit(message1, message2)
69.     changes = subs + insLeft + insRight
70.     print("******************************************************************")
71.     print("Comparing '" + str(message1) + "' and '" + str(message2) + "':")
72.     print("Changes = " + str(changes) + " <----> " + str(insLeft) + " insertionsLeft, " + str(subs) + " substitutions, " + str(insRight) + " insertionsRight")
73.     print("******************************************************************")
74.     print("    1. InsertionsLeft = " + str(insLeft))
75.     for i in range(len(insLeftPrints)):
76.         print("    " + str(insLeftPrints[i]))
77.     print()
78.     print("    2. Substitutions = " + str(subs))
79.     for i in range(len(subPrints)):
80.         print("    " + str(subPrints[i]))
81.     print()
82.     print("    3. InsertionsRight = " + str(insRight))
83.     for i in range(len(insRightPrints)):
84.         print("    " + str(insRightPrints[i]))
85.     print()
86.
87. # MAIN FUNCTION
88. prettyPrint("believe", "unbelievable")
89. prettyPrint("kite", "able")
90. prettyPrint("suffer", "unsuffer")
91. prettyPrint("sales", "sure")
RAW Paste Data