Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def distance(a,b):
- "Calculates the levenshtein distance between a and b."
- n, m = len(a), len(b)
- if n > m:
- # Make sure n <= m, to use 0(min(n, m)) space
- a, b = b, a
- n, m = m, n
- current_row = range(n + 1) # Keep current and previous row, not entire matrix
- for i in range(1, m + 1):
- previous_row, current_row = current_row, [i] + [0] * n
- for j in range(1, n + 1):
- add, delete, change = previous_row[j] + 1, current_row[j - 1] + 1, previous[j - 1]
- if a[j - 1] != b[i -1]:
- change += 1
- current_row[j] = min(add, delete, change)
- return curent_row
- def main():
- f = open('Brain012.txt')
- d = open('dict1.txt')
- text = split_text(f)
- countWIT = len(text)
- setText = mySet(text)
- countWIST = len(setText)
- print("="*20, 'До проверки', '='*20)
- print('Колво словоформ в тексте = ', countWIT)
- print('Колво разных словоформ в тексте = ', countWist)
- def mySet(text):
- ms = []
- flag = 0
- for word in text:
- if word in ms:
- continue
- else:
- ms.append(word)
- return ms
- def split_text(f):
- text = []
- delimiters = ['!', '?', ',', ';', '.', ':', '«', '»', '(', ')' ]
- for line in f:
- for word in line.lower().split():
- for char in word:
- if char in delimiters:
- word = word.replace(char, '')
- if word != '':
- text.append(word)
- return text
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement