Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import nltk, csv, time
- import nltk.data, nltk.tag
- from nltk.tag.perceptron import PerceptronTagger
- from nltk import word_tokenize
- tagger = PerceptronTagger()
- tagset = None
- def pos_tag2 (tokens):
- tagged_tokens = tagger.tag(tokens)
- return tagged_tokens
- models = {'select': 'VB'}
- tagger2 = nltk.tag.UnigramTagger(model=models,backoff=nltk.DefaultTagger('derp'))
- #tagger2 = nltk.tag.UnigramTagger(model=models,backoff = pos_tag2)
- #I want it to do what's in the line above.
- #I need it to first identify all of the words, just select for now, in the models list
- #Then if it doesn't ID a word via models, to check via Perceptron
- p = ['I', 'just', 'drank', 'some', 'select', 'coffee', '.']
- q = tagger2.tag(p)
- print q
- # Right now it outputs:
- # [('I', 'derp'), ('just', 'derp'), ('drank', 'derp'), ('some', 'derp'), ('select', 'VB'), ('coffee', 'derp'), ('.', 'derp')]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement