Guest User

Chapter 6, exercise 5

a guest
Mar 4th, 2012
368
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.22 KB | None | 0 0
  1. ## -*- coding: utf-8 -*-
  2. import nltk
  3. import random
  4.  
  5. def gender_features(word):
  6.     return {'last_letter': word[-1],
  7.             'last_two_letters': word[-2:],
  8.             'last_three_letters': word[-3:]}
  9.  
  10. maleNames = [unicode(name, 'latin-1')
  11.              for name in nltk.corpus.names.words('male.txt')]
  12. femaleNames = [unicode(name, 'latin-1')
  13.                for name in nltk.corpus.names.words('female.txt')]
  14.  
  15. names = [(name, 'male') for name in maleNames] + \
  16.         [(name, 'female') for name in femaleNames]
  17. random.shuffle(names)
  18.  
  19. featuresets = [(gender_features(n), g) for (n,g) in names]
  20. train_set, test_set = featuresets[500:], featuresets[:500]
  21.  
  22. naiveBayesClassifier = nltk.NaiveBayesClassifier.train(train_set)
  23. decisionTreeClassifier = nltk.DecisionTreeClassifier.train(train_set)
  24. maxEntClassifier = nltk.MaxentClassifier.train(train_set)
  25.  
  26. print 'Accuracy with the naive Bayes classifier : {0:.2f}%'.format(nltk.classify.accuracy(naiveBayesClassifier, test_set))
  27. print 'Accuracy with the decision tree classifier : {0:.2f}%'.format(nltk.classify.accuracy(decisionTreeClassifier, test_set))
  28. print 'Accuracy with the maximum entropy classifier : {0:.2f}%'.format(nltk.classify.accuracy(maxEntClassifier, test_set))
Advertisement
Add Comment
Please, Sign In to add comment