Advertisement
Guest User

Amazon reviews classification

a guest
Nov 15th, 2018
96
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.09 KB | None | 0 0
  1. import nltk
  2. import random
  3.  
  4. from nltk.sentiment.util import *
  5. from pymongo import MongoClient
  6.  
  7. random.seed(2213)
  8.  
  9. # SENTIMENT ANALYSIS - AMAZON REVIEWS CLASSIFICATION
  10.  
  11. # Retrieves document (review) data from mongodb
  12. # Returns: [ (list of words review1, cat review1), ...]
  13. def getReviewData():
  14.     client = MongoClient('localhost', 27018)
  15.     client.test.authenticate('studenti', '_____', mechanism='SCRAM-SHA-1')
  16.    
  17.     ...
  18.    
  19.     return reviews
  20.  
  21. # Create train and test sets
  22. reviews = getReviewData()
  23. random.shuffle(reviews)
  24. sentimAnalyzer = nltk.sentiment.SentimentAnalyzer()
  25. allWordsNeg = sentimAnalyzer.all_words([mark_negation(doc) for doc in reviews])
  26. unigramFeats = sentimAnalyzer.unigram_word_feats(allWordsNeg, min_freq=4)
  27. sentimAnalyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigramFeats)
  28. featureSets = sentimAnalyzer.apply_features(reviews)
  29. trainSet, testSet = ...
  30.  
  31. # Train and classify
  32. classifier = sentimAnalyzer.train(nltk.classify.NaiveBayesClassifier.train, trainSet)
  33. for key,value in sorted(sentimAnalyzer.evaluate(testSet).items()):
  34.     print('{0}: {1}'.format(key, value))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement