Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def make_normal(corpus):
- def remove_dog(corpus):
- for i in range(len(corpus)):
- split = corpus[i].split()
- for j in range(len(split)):
- if split[j][0] == '@':
- split.remove(split[j])
- break
- corpus[i] = ' '.join(split)
- def remove_dots(corpus):
- for i in range(len(corpus)):
- corpus[i] = corpus[i].lower()
- corpus[i] = corpus[i].replace(',', '').replace('.', '').replace('-', '').replace(';', '').replace('"', '').replace("'", '').replace('!', '')
- def correct_words(corpus):
- lemma = MorphAnalyzer()
- for i in range(len(corpus)):
- split = corpus[i].split()
- for j in range(len(split)):
- split[j] = lemma.parse(split[j])[0].normal_form
- corpus[i] = ' '.join(split)
- remove_dog(corpus)
- remove_dots(corpus)
- correct_words(corpus)
- return 'Done!'
- make_normal(corpus)
- make_normal(test)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement