Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import argparse
- from nltk import pos_tag, word_tokenize
- from nltk.stem import snowball
- parser = argparse.ArgumentParser()
- parser.add_argument("file")
- options = parser.parse_args()
- with open(options.file, 'r') as f:
- gum_text = f.read()
- gum_docs = word_tokenize(gum_text)
- my_stemmer = snowball.SnowballStemmer("english")
- stemmed = []
- for documents in gum_docs:
- stemmed.append(my_stemmer.stem(documents))
- print(stemmed)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement