Advertisement
Guest User

Untitled

a guest
Dec 8th, 2019
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.69 KB | None | 0 0
  1. def distance(a,b):
  2. "Calculates the levenshtein distance between a and b."
  3. n, m = len(a), len(b)
  4. if n > m:
  5. # Make sure n <= m, to use 0(min(n, m)) space
  6. a, b = b, a
  7. n, m = m, n
  8.  
  9. current_row = range(n + 1) # Keep current and previous row, not entire matrix
  10. for i in range(1, m + 1):
  11. previous_row, current_row = current_row, [i] + [0] * n
  12. for j in range(1, n + 1):
  13. add, delete, change = previous_row[j] + 1, current_row[j - 1] + 1, previous[j - 1]
  14. if a[j - 1] != b[i -1]:
  15. change += 1
  16. current_row[j] = min(add, delete, change)
  17.  
  18. return curent_row
  19.  
  20. def main():
  21. f = open('Brain012.txt')
  22. d = open('dict1.txt')
  23.  
  24. text = split_text(f)
  25. countWIT = len(text)
  26. setText = mySet(text)
  27. countWIST = len(setText)
  28.  
  29. print("="*20, 'До проверки', '='*20)
  30. print('Колво словоформ в тексте = ', countWIT)
  31. print('Колво разных словоформ в тексте = ', countWist)
  32.  
  33. def mySet(text):
  34. ms = []
  35. flag = 0
  36. for word in text:
  37. if word in ms:
  38. continue
  39. else:
  40. ms.append(word)
  41.  
  42. return ms
  43.  
  44. def split_text(f):
  45. text = []
  46. delimiters = ['!', '?', ',', ';', '.', ':', '«', '»', '(', ')' ]
  47.  
  48. for line in f:
  49. for word in line.lower().split():
  50. for char in word:
  51. if char in delimiters:
  52. word = word.replace(char, '')
  53. if word != '':
  54. text.append(word)
  55.  
  56. return text
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement