Advertisement
Guest User

ch05

a guest
May 17th, 2014
1,261
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.46 KB | None | 0 0
  1.     import nltk
  2.     from nltk.corpus import brown
  3.     brown_tagged_sents = brown.tagged_sents(categories='news')
  4.     brown_sents = brown.sents(categories='news')
  5.    
  6.     size = int(len(brown_tagged_sents) * 0.9)
  7.    
  8.     train_sents = brown_tagged_sents[:size]
  9.     test_sents = brown_tagged_sents[size:]
  10.    
  11.     t0 = nltk.DefaultTagger('NN')
  12.     t1 = nltk.UnigramTagger(train_sents, backoff=t0)
  13.     t2 = nltk.BigramTagger(train_sents, backoff=t1)
  14.     t2.evaluate(test_sents)
  15.    
  16.     from cPickle import dump
  17.     output = open('t2.pkl', 'wb')
  18.     dump(t2, output, -1)
  19.     output.close()
  20.    
  21.     from cPickle import load
  22.     input = open('t2.pkl', 'rb')
  23.     tagger = load(input)
  24.     input.close()
  25.    
  26.     text = """The board's action shows what free enterprise
  27.        is up against in our complex maze of regulatory laws ."""
  28.     tokens = text.split()
  29.     tagger.tag(tokens)
  30.    
  31.     cfd = nltk.ConditionalFreqDist(
  32.                ((x[1], y[1], z[0]), z[1])
  33.                for sent in brown_tagged_sents
  34.                for x, y, z in nltk.trigrams(sent))
  35.     ambiguous_contexts = [c for c in cfd.conditions() if len(cfd[c]) > 1]
  36.     sum(cfd[c].N() for c in ambiguous_contexts) / cfd.N()
  37.    
  38.     test_tags = [tag for sent in brown.sents(categories='editorial')
  39.                      for (word, tag) in t2.tag(sent)]
  40.     gold_tags = [tag for (word, tag) in brown.tagged_words(categories='editorial')]
  41.     print nltk.ConfusionMatrix(gold_tags, test_tags)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement