Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Import of required libraries for this project
- import requests
- import json
- import time
- import re
- import pandas
- import openpyxl
- #Defining the start and the end of the first day that we want to analyze
- start = 1501545600
- end = 1501631999
- #Initialization of every list
- dates = []
- comments = []
- scores = []
- positive_sentiment = []
- negative_sentiment = []
- positive_slash_negative = []
- sentiment_score = []
- #Import of positive and negative pools of words
- p = open("positive.txt",'r')
- n = open("negative.txt",'r')
- x = p.read().split("\n")
- y = n.read().split("\n")
- i = 1
- number_of_days = 365
- while i < number_of_days:
- #URL creation
- url = 'https://api.pushshift.io/reddit/submission/search/?after='+str(start)+'&before='+str(end)+'&sort_type=score&sort=desc&subreddit=Bitcoin&category=best&size=30'
- r = requests.get(url)
- #Data export from the Response
- responseData = r.json()
- data = responseData['data']
- #Reseting all the counters and scores to 0 for the new posts
- counter = 0
- positive_score = 0
- negative_score = 0
- total_score = 0
- total_comments = 0
- for post in data:
- #Data export from the post
- post_id = post['id']
- title = post['title']
- comment = post['num_comments']
- score = post['score']
- if 'selftext' not in post:
- text = ""
- else:
- text = post['selftext']
- total_score = total_score + score
- total_comments = total_comments + comment
- positive_counter=0
- negative_counter=0
- #TITLE ANALYSYS
- title = re.sub('[,.!?;:]',' ',title)
- words_title = re.findall(r"[\w']+", title)
- for word in words_title:
- for positive_word in x:
- if positive_word == word.lower():
- positive_counter = positive_counter + 1
- break
- for negative_word in y:
- if negative_word == word.lower():
- negative_counter = negative_counter + 1
- break
- #IF TEXT AVAILABLE TEXT ANALYSYS
- if text != "":
- text = re.sub('[,.!?;:]',' ',text)
- words_text = re.findall(r"[\w']+", text)
- positive_counter=0
- negative_counter=0
- for word in words_text:
- for positive_word in x:
- if positive_word == word.lower():
- positive_counter = positive_counter + 1
- break
- for negative_word in y:
- if negative_word == word.lower():
- negative_counter = negative_counter + 1
- break
- if text == "":
- counter = counter + 1
- positive_score = positive_score + positive_counter
- negative_score = negative_score + negative_counter
- #Date transformation from timestamp to datetime
- date = time.strftime('%Y-%m-%d', time.localtime(start))
- #Adding all the elements to their lists
- dates.append(date)
- comments.append(str(total_comments))
- scores.append(str(total_score))
- positive_sentiment.append(str(positive_score))
- negative_sentiment.append(str(negative_score))
- positive_slash_negative.append(str(positive_score/negative_score))
- sentiment_score.append(str(positive_score-negative_score))
- #Writing out to the console that the data for this date have been finished
- day = {"date":date,"positive score":positive_score,"negative score:":negative_score}
- print(day)
- #Changing the iterator for the next day
- i=i+1
- start+=86399
- end+=86399
- #Data export to datasheet
- new_dataframe = pandas.DataFrame(
- {
- "Date": dates,
- "Num of comments on 30 posts": comments,
- "Score on 30 posts": scores,
- "Positive Sentiment": positive_sentiment,
- "Negative Sentiment": negative_sentiment,
- "P/N": positive_slash_negative,
- "Sentiment Score": sentiment_score
- }
- )
- writer = pandas.ExcelWriter('output.xlsx')
- new_dataframe.to_excel(writer,'Sheet1')
- writer.save()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement