Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Homework 8: String edit distance
- import os
- import sys
- # Calculate the cost of an 'operation' given 2 characters:
- def op_cost(a, b):
- vowels = "aeiou"
- if a == b:
- # identity
- return 0.0
- elif a in vowels and b in vowels:
- # v to v
- return 0.5
- elif a not in vowels and b not in vowels:
- # c to c
- return 0.6
- elif (a in vowels) != (b in vowels):
- # c to v
- return 1.2
- # insertion/deletion
- elif a == '' or b == '':
- return 2.0
- def sed(string1, string2):
- # make a dictionary with all possible combinations of characters from string1 and string2
- list1 = range(len(string1)+1)
- list2 = range(len(string2)+1)
- # character indexes will actually start at 1, not 0
- indices = []
- d = dict()
- for i in list1:
- for j in list2:
- d[i,j] = 0.0 # initialize first
- for i in list1:
- d[i,0] = i
- for j in list2:
- d[0,j] = j
- for j in list2[1:]:
- for i in list1[1:]:
- if string1[i-1] == string2[j-1]:
- d[i,j] = d[i-1,j-1]
- indices.append((i-1,j-1))
- else:
- d[i,j] = min(d[i-1, j] + 2.0,
- d[i, j-1] + 2.0,
- d[i-1,j-1] + op_cost(string1[i-1], string2[j-1]))
- # indices.append((i,j))
- if d[i,j] == d[i-1, j] + 2.0:
- indices.append((i-1,j))
- elif d[i,j] == d[i, j-1] + 2.0:
- indices.append((i,j-1))
- elif d[i,j] == d[i-1,j-1] + op_cost(string1[i-1], string2[j-1]):
- indices.append((i-1,j-1))
- print indices
- print len(indices)
- print len(d.keys())
- print "\n"
- print d
- print d[4,3]
- print d[3,3]
- print d[8,5]
- print d[13,9] # should be 11.0
- print d[12,8] # should be 10.4
- print d[1,1]
- print d[2,2]
- print d[3,3]
- print d[4,3]
- print d[5,3]
- return d, d[len(string1), len(string2)], indices
- def print_sed(d, string1, string2, indices):
- str1_list = list(string1)
- str2_list = list(string2)
- list1 = range(len(string1)+1)
- list2 = range(len(string2)+1)
- sys.stdout.write(" ")
- for char in str1_list:
- printstr = char.ljust(5)
- sys.stdout.write(printstr)
- sys.stdout.write("\n")
- for i in list2[1:]:
- printstr1 = str2_list[i-1].ljust(5)
- sys.stdout.write(printstr1)
- for j in list1[1:]:
- printstr2 = str(d[j,i]).ljust(5)
- sys.stdout.write(printstr2)
- sys.stdout.write("\n")
- string1 = sys.argv[1]
- string2 = sys.argv[2]
- sed = sed(string1, string2)
- print_sed(sed[0], string1, string2, sed[2])
- print "String edit distance for %s and %s: %.1f" % (string1, string2, sed[1])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement