Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## -*- coding: utf-8 -*-
- import nltk
- import random
- def gender_features(word):
- return {'last_letter': word[-1],
- 'last_two_letters': word[-2:],
- 'last_three_letters': word[-3:]}
- maleNames = [unicode(name, 'latin-1')
- for name in nltk.corpus.names.words('male.txt')]
- femaleNames = [unicode(name, 'latin-1')
- for name in nltk.corpus.names.words('female.txt')]
- names = [(name, 'male') for name in maleNames] + \
- [(name, 'female') for name in femaleNames]
- random.shuffle(names)
- featuresets = [(gender_features(n), g) for (n,g) in names]
- train_set, test_set = featuresets[500:], featuresets[:500]
- naiveBayesClassifier = nltk.NaiveBayesClassifier.train(train_set)
- decisionTreeClassifier = nltk.DecisionTreeClassifier.train(train_set)
- maxEntClassifier = nltk.MaxentClassifier.train(train_set)
- print 'Accuracy with the naive Bayes classifier : {0:.2f}%'.format(nltk.classify.accuracy(naiveBayesClassifier, test_set))
- print 'Accuracy with the decision tree classifier : {0:.2f}%'.format(nltk.classify.accuracy(decisionTreeClassifier, test_set))
- print 'Accuracy with the maximum entropy classifier : {0:.2f}%'.format(nltk.classify.accuracy(maxEntClassifier, test_set))
Advertisement
Add Comment
Please, Sign In to add comment