Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import nltk, argparse
- parser = argparse.ArgumentParser()
- parser.add_argument("filename")
- options = parser.parse_args()
- with open(options.filename, 'r') as f:
- text = f.read()
- tokenized = nltk.word_tokenize(text)
- tagged = nltk.pos_tag(tokenized)
- length = len(tokenized)
- adj_words = []
- for tag in tagged:
- if tag[1][0] == 'J':
- adj_words.append(tag[0])
- adj_dist = nltk.FreqDist(adj_words)
- print(adj_dist.most_common(10))
- print(adj_dist.plot(100))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement