Advertisement
Guest User

Untitled

a guest
Apr 8th, 2020
169
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.02 KB | None | 0 0
  1. def make_normal(corpus):
  2.     def remove_dog(corpus):
  3.         for i in range(len(corpus)):
  4.             split = corpus[i].split()
  5.             for j in range(len(split)):
  6.                 if split[j][0] == '@':
  7.                     split.remove(split[j])
  8.                     break
  9.             corpus[i] = ' '.join(split)
  10.    
  11.     def remove_dots(corpus):
  12.         for i in range(len(corpus)):
  13.             corpus[i] = corpus[i].lower()
  14.             corpus[i] = corpus[i].replace(',', '').replace('.', '').replace('-', '').replace(';', '').replace('"', '').replace("'", '').replace('!', '')
  15.            
  16.     def correct_words(corpus):
  17.         lemma = MorphAnalyzer()
  18.         for i in range(len(corpus)):
  19.             split = corpus[i].split()
  20.             for j in range(len(split)):
  21.                 split[j] = lemma.parse(split[j])[0].normal_form
  22.             corpus[i] = ' '.join(split)      
  23.    
  24.     remove_dog(corpus)
  25.     remove_dots(corpus)
  26.     correct_words(corpus)
  27.  
  28.     return 'Done!'
  29.  
  30. make_normal(corpus)
  31. make_normal(test)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement