Advertisement
Guest User

Untitled

a guest
May 30th, 2015
202
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.43 KB | None | 0 0
  1. # Homework 8: String edit distance
  2.  
  3. import os
  4. import sys
  5.  
  6. # Calculate the cost of an 'operation' given 2 characters:
  7. def op_cost(a, b):
  8.     vowels = "aeiou"
  9.     if a == b:
  10.         # identity
  11.         return 0.0
  12.     elif a in vowels and b in vowels:
  13.         # v to v
  14.         return 0.5
  15.     elif a not in vowels and b not in vowels:
  16.         # c to c
  17.         return 0.6
  18.     elif (a in vowels) != (b in vowels):
  19.         # c to v
  20.         return 1.2
  21.     # insertion/deletion
  22.     elif a == '' or b == '':
  23.         return 2.0
  24.  
  25. def sed(string1, string2):
  26.     # make a dictionary with all possible combinations of characters from string1 and string2
  27.     list1 = range(len(string1)+1)
  28.     list2 = range(len(string2)+1)
  29.     # character indexes will actually start at 1, not 0
  30.     indices = []
  31.     d = dict()
  32.     for i in list1:
  33.         for j in list2:
  34.             d[i,j] = 0.0 # initialize first
  35.  
  36.     for i in list1:
  37.         d[i,0] = i
  38.     for j in list2:
  39.         d[0,j] = j
  40.    
  41.     for j in list2[1:]:
  42.         for i in list1[1:]:
  43.             if string1[i-1] == string2[j-1]:
  44.                 d[i,j] = d[i-1,j-1]
  45.                 indices.append((i-1,j-1))
  46.             else:
  47.                 d[i,j] = min(d[i-1, j] + 2.0,
  48.                              d[i, j-1] + 2.0,
  49.                              d[i-1,j-1] + op_cost(string1[i-1], string2[j-1]))
  50.                 # indices.append((i,j))
  51.                 if d[i,j] == d[i-1, j] + 2.0:
  52.                     indices.append((i-1,j))
  53.                 elif d[i,j] == d[i, j-1] + 2.0:
  54.                     indices.append((i,j-1))
  55.                 elif d[i,j] == d[i-1,j-1] + op_cost(string1[i-1], string2[j-1]):
  56.                     indices.append((i-1,j-1))
  57.     print indices
  58.     print len(indices)
  59.     print len(d.keys())
  60.     print "\n"
  61.     print d
  62.     print d[4,3]
  63.     print d[3,3]
  64.     print d[8,5]
  65.     print d[13,9] # should be 11.0
  66.     print d[12,8] # should be 10.4
  67.     print d[1,1]
  68.     print d[2,2]
  69.     print d[3,3]
  70.     print d[4,3]
  71.     print d[5,3]
  72.     return d, d[len(string1), len(string2)], indices
  73.  
  74. def print_sed(d, string1, string2, indices):
  75.     str1_list = list(string1)
  76.     str2_list = list(string2)
  77.     list1 = range(len(string1)+1)
  78.     list2 = range(len(string2)+1)
  79.  
  80.     sys.stdout.write("     ")
  81.     for char in str1_list:
  82.         printstr = char.ljust(5)
  83.         sys.stdout.write(printstr)
  84.  
  85.     sys.stdout.write("\n")
  86.    
  87.     for i in list2[1:]:
  88.         printstr1 = str2_list[i-1].ljust(5)
  89.         sys.stdout.write(printstr1)
  90.         for j in list1[1:]:
  91.             printstr2 = str(d[j,i]).ljust(5)
  92.             sys.stdout.write(printstr2)
  93.         sys.stdout.write("\n")
  94.  
  95. string1 = sys.argv[1]
  96. string2 = sys.argv[2]
  97.  
  98. sed = sed(string1, string2)
  99. print_sed(sed[0], string1, string2, sed[2])
  100. print "String edit distance for %s and %s: %.1f" % (string1, string2, sed[1])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement