Untitled

import tweepy
import csv
import json
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import string
from matplotlib import pyplot as plt
import numpy as np
import math
import statistics as stats

with open('twitter_credentials.json') as cred_data:
    info = json.load(cred_data)
    consumer_key = info['CONSUMER_KEY']
    consumer_secret = info['CONSUMER_SECRET']
    access_key = info['ACCESS_KEY']
    access_secret = info['ACCESS_SECRET']

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
api = tweepy.API(auth)

maximum_number_of_tweets_to_be_extracted = \
int(input('Enter the number of tweets that you want to extract - '))

hashtag = input('Enter the keyword(s) you want to scrape - ').replace(", ", " OR ")
lines = [["Tweet", "Score", "Location", "User"]]
analyzer = SentimentIntensityAnalyzer()
scores = []

for tweet in tweepy.Cursor(api.search, q= hashtag, geocode="37.752721,-122.327076,30mi", rpp=100).items(maximum_number_of_tweets_to_be_extracted):
    loc = tweet.user.location
    user = tweet.user.name
    tweet = str(tweet.text.encode("utf-8")).strip("'b")
    tweet = tweet.translate(string.punctuation)
    line = [tweet]
    score = analyzer.polarity_scores(tweet)["compound"]
    scores.append(score)
    line.append(score)
    line.append(loc)
    line.append(user)
    lines.append(line)


with open(hashtag+'.csv', 'w') as csvFile:
    writer = csv.writer(csvFile)
    writer.writerows(lines)

csvFile.close()

plt.xlim([-1, 1])
plt.hist(scores, bins=20, alpha=0.5)
plt.title('Sentiment Analysis Score Distribution for keyword(s) ' + hashtag.replace(" OR ", ", "))
plt.xlabel('Score Interval')
plt.ylabel('Count')

print("Average Score: " + str(stats.mean(scores)))
print("Median Score: " + str(stats.median(scores)))
plt.show()