Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import tweepy
- import csv
- import json
- from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
- import string
- from matplotlib import pyplot as plt
- import numpy as np
- import math
- import statistics as stats
- with open('twitter_credentials.json') as cred_data:
- info = json.load(cred_data)
- consumer_key = info['CONSUMER_KEY']
- consumer_secret = info['CONSUMER_SECRET']
- access_key = info['ACCESS_KEY']
- access_secret = info['ACCESS_SECRET']
- auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
- api = tweepy.API(auth)
- maximum_number_of_tweets_to_be_extracted = \
- int(input('Enter the number of tweets that you want to extract - '))
- hashtag = input('Enter the keyword(s) you want to scrape - ').replace(", ", " OR ")
- lines = [["Tweet", "Score", "Location", "User"]]
- analyzer = SentimentIntensityAnalyzer()
- scores = []
- for tweet in tweepy.Cursor(api.search, q= hashtag, geocode="37.752721,-122.327076,30mi", rpp=100).items(maximum_number_of_tweets_to_be_extracted):
- loc = tweet.user.location
- user = tweet.user.name
- tweet = str(tweet.text.encode("utf-8")).strip("'b")
- tweet = tweet.translate(string.punctuation)
- line = [tweet]
- score = analyzer.polarity_scores(tweet)["compound"]
- scores.append(score)
- line.append(score)
- line.append(loc)
- line.append(user)
- lines.append(line)
- with open(hashtag+'.csv', 'w') as csvFile:
- writer = csv.writer(csvFile)
- writer.writerows(lines)
- csvFile.close()
- plt.xlim([-1, 1])
- plt.hist(scores, bins=20, alpha=0.5)
- plt.title('Sentiment Analysis Score Distribution for keyword(s) ' + hashtag.replace(" OR ", ", "))
- plt.xlabel('Score Interval')
- plt.ylabel('Count')
- print("Average Score: " + str(stats.mean(scores)))
- print("Median Score: " + str(stats.median(scores)))
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement