Advertisement
Guest User

Untitled

a guest
Feb 9th, 2016
57
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.87 KB | None | 0 0
  1. import nltk
  2. import random
  3. from nltk.corpus import movie_reviews
  4.  
  5. documents = [(list(movie_reviews.words(fileid)), category)
  6. for category in movie_reviews.categories()
  7. for fileid in movie_reviews.fileids(category)]
  8.  
  9. random.shuffle(documents)
  10.  
  11. all_words = []
  12. for w in movie_reviews.words():
  13. all_words.append(w.lower())
  14. all_words = nltk.FreqDist(all_words)
  15. word_features = list(all_words.keys())[:3000]
  16.  
  17. def find_features(document):
  18. words = set(document)
  19. features = {}
  20. for w in word_features:
  21. features[w] = (w in words)
  22.  
  23. return features
  24.  
  25. featuresets = [(find_features(rev), category) for (rev, category) in documents]
  26.  
  27.  
  28. training_set = featuresets[:1900]
  29. testing_set = featuresets[1900:]
  30.  
  31. classifier = nltk.NaiveBayesClassifier.train(training_set)
  32. print("Classifier accuracy percent:",(nltk.classify.accuracy(classifier, testing_set))*100)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement