Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## -*- coding: utf-8 -*-
- import nltk
- import random
- wholeTrainingCorpus = nltk.corpus.ppattach.attachments('training')
- nounAttachmentCorpus = [inst for inst in wholeTrainingCorpus
- if inst.attachment == 'N']
- random.shuffle(nounAttachmentCorpus)
- def get_features(inst):
- features = {}
- features['noun1'] = inst.noun1
- # features['noun2'] = inst.noun2
- # features['verb'] = inst.verb
- return features
- featureSets = [(get_features(inst), inst.prep)
- for inst in nounAttachmentCorpus]
- cutOff = len(featureSets) / 4
- trainSet, testSet = featureSets[:cutOff], featureSets[cutOff:]
- naiveBayesClassifier = nltk.NaiveBayesClassifier.train(trainSet)
- decisionTreeClassifier = nltk.DecisionTreeClassifier.train(trainSet)
- print 'Accuracy with the naive Bayes classifier : {0:.2f}%'.format(nltk.classify.accuracy(naiveBayesClassifier, testSet))
- print 'team', naiveBayesClassifier.classify({'noun1': 'team'}), 'researchers'
- print
- print 'Accuracy with the decision tree classifier : {0:.2f}%'.format(nltk.classify.accuracy(decisionTreeClassifier, testSet))
- print 'team', decisionTreeClassifier.classify({'noun1': 'team'}), 'researchers'
Advertisement
Add Comment
Please, Sign In to add comment