Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def pretreatment(fileName):
- with open(fileName, 'r') as file:
- text = file.readlines()
- corpus = []
- for occurrence in text:
- occurrence = occurrence.replace('.',' ')
- occurrence = occurrence.replace(',',' ')
- occurrence = occurrence.replace(';',' ')
- occurrence = occurrence.replace('(',' ')
- occurrence = occurrence.replace(')',' ')
- occurrence = occurrence.replace('?',' ')
- occurrence = occurrence.replace('!',' ')
- occurrence = occurrence.replace(':',' ')
- corpus.append(occurrence)
- return corpus
- def lexical_analysis(corpus):
- lexical_corpus = pretreatment(corpus)
- tokens = nltk.word_tokenize(lexical_corpus)
- return tokens
- print(pretreatment("blabla.txt"))
- print(lexical_analysis(corpus))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement