Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import nltk
- import sys
- write = sys.stdout.write
- class Analyzer():
- """Implements sentiment analysis."""
- def __init__(self, positives, negatives):
- """Initialize Analyzer."""
- self.positives = []
- self.negatives = []
- with open(positives) as infile:
- for line in infile:
- if not line.startswith((';', ' ')):
- self.positives.append(line)
- with open(negatives) as infile:
- for line in infile:
- if not line.startswith((';', ' ')):
- self.negatives.append(line)
- def analyze(self, text):
- """Analyze text for sentiment, returning its score."""
- #scores: positive = 1, negative = -1, neutral = 0
- score = 0
- #instantiate tokenizer
- tokenizer = nltk.tokenize.TweetTokenizer
- tokens = tokenizer.tokenize(text)
- #iterate over tokenized tweets
- for token in tokens:
- token = token.lower()
- #check if word is pos or neg (if text is in self.positives or self.negatives )
- if token in self.positives:
- score += 1
- elif token in self.negatives:
- score -= 1
- return score
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement