Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import argparse
- from nltk.stem import snowball
- from nltk import pos_tag, word_tokenize
- from nltk.corpus import wordnet
- parser = argparse.ArgumentParser()
- parser.add_argument("file")
- options = parser.parse_args()
- with open(options.file, 'r') as f:
- gum_text = f.read()
- gum_docs = gum_text.split("\n")
- frequencies = []
- for document in gum_docs:
- tokenized = word_tokenize(document)
- tagged = pos_tag(tokenized)
- frequencies.append(tagged)
- print(frequencies[0][0])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement