Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import nltk
- import random
- from nltk.sentiment.util import *
- from pymongo import MongoClient
- random.seed(2213)
- # SENTIMENT ANALYSIS - AMAZON REVIEWS CLASSIFICATION
- # Retrieves document (review) data from mongodb
- # Returns: [ (list of words review1, cat review1), ...]
- def getReviewData():
- client = MongoClient('localhost', 27018)
- client.test.authenticate('studenti', '_____', mechanism='SCRAM-SHA-1')
- ...
- return reviews
- # Create train and test sets
- reviews = getReviewData()
- random.shuffle(reviews)
- sentimAnalyzer = nltk.sentiment.SentimentAnalyzer()
- allWordsNeg = sentimAnalyzer.all_words([mark_negation(doc) for doc in reviews])
- unigramFeats = sentimAnalyzer.unigram_word_feats(allWordsNeg, min_freq=4)
- sentimAnalyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigramFeats)
- featureSets = sentimAnalyzer.apply_features(reviews)
- trainSet, testSet = ...
- # Train and classify
- classifier = sentimAnalyzer.train(nltk.classify.NaiveBayesClassifier.train, trainSet)
- for key,value in sorted(sentimAnalyzer.evaluate(testSet).items()):
- print('{0}: {1}'.format(key, value))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement