Advertisement
TeamFocus-Matija

Skripta

Sep 21st, 2018
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.31 KB | None | 0 0
  1. #Import of required libraries for this project
  2. import requests
  3. import json
  4. import time
  5. import re
  6. import pandas
  7. import openpyxl
  8.  
  9. #Defining the start and the end of the first day that we want to analyze
  10. start = 1501545600
  11. end = 1501631999
  12.  
  13. #Initialization of every list
  14. dates = []
  15. comments = []
  16. scores = []
  17. positive_sentiment = []
  18. negative_sentiment = []
  19. positive_slash_negative = []
  20. sentiment_score = []
  21.  
  22. #Import of positive and negative pools of words
  23. p = open("positive.txt",'r')
  24. n = open("negative.txt",'r')
  25.  
  26. x = p.read().split("\n")
  27. y = n.read().split("\n")
  28.  
  29. i = 1
  30. number_of_days = 365
  31. while i < number_of_days:
  32.     #URL creation
  33.     url = 'https://api.pushshift.io/reddit/submission/search/?after='+str(start)+'&before='+str(end)+'&sort_type=score&sort=desc&subreddit=Bitcoin&category=best&size=30'
  34.     r = requests.get(url)
  35.    
  36.     #Data export from the Response
  37.     responseData = r.json()
  38.     data = responseData['data']
  39.  
  40.     #Reseting all the counters and scores to 0 for the new posts
  41.     counter = 0
  42.     positive_score = 0
  43.     negative_score = 0
  44.     total_score = 0
  45.     total_comments = 0
  46.     for post in data:
  47.         #Data export from the post
  48.         post_id = post['id']
  49.         title = post['title']
  50.         comment = post['num_comments']
  51.         score = post['score']
  52.         if 'selftext' not in post:
  53.             text = ""
  54.         else:
  55.             text = post['selftext']
  56.  
  57.         total_score = total_score + score
  58.         total_comments = total_comments + comment
  59.        
  60.         positive_counter=0
  61.         negative_counter=0
  62.         #TITLE ANALYSYS
  63.         title = re.sub('[,.!?;:]',' ',title)
  64.         words_title = re.findall(r"[\w']+", title)
  65.         for word in words_title:
  66.             for positive_word in x:
  67.                 if positive_word == word.lower():
  68.                     positive_counter = positive_counter + 1
  69.                     break
  70.             for negative_word in y:
  71.                 if negative_word == word.lower():
  72.                     negative_counter = negative_counter + 1
  73.                     break
  74.  
  75.         #IF TEXT AVAILABLE TEXT ANALYSYS
  76.         if text != "":
  77.             text = re.sub('[,.!?;:]',' ',text)
  78.             words_text = re.findall(r"[\w']+", text)
  79.            
  80.             positive_counter=0
  81.             negative_counter=0
  82.             for word in words_text:
  83.                 for positive_word in x:
  84.                     if positive_word == word.lower():
  85.                         positive_counter = positive_counter + 1
  86.                         break
  87.                 for negative_word in y:
  88.                     if negative_word == word.lower():
  89.                         negative_counter = negative_counter + 1
  90.                         break
  91.         if text == "":
  92.             counter = counter + 1
  93.         positive_score = positive_score + positive_counter
  94.         negative_score = negative_score + negative_counter
  95.  
  96.     #Date transformation from timestamp to datetime
  97.     date = time.strftime('%Y-%m-%d', time.localtime(start))
  98.  
  99.     #Adding all the elements to their lists
  100.     dates.append(date)
  101.     comments.append(str(total_comments))
  102.     scores.append(str(total_score))
  103.     positive_sentiment.append(str(positive_score))
  104.     negative_sentiment.append(str(negative_score))
  105.     positive_slash_negative.append(str(positive_score/negative_score))
  106.     sentiment_score.append(str(positive_score-negative_score))
  107.    
  108.     #Writing out to the console that the data for this date have been finished
  109.     day = {"date":date,"positive score":positive_score,"negative score:":negative_score}
  110.     print(day)
  111.    
  112.     #Changing the iterator for the next day
  113.     i=i+1
  114.     start+=86399
  115.     end+=86399
  116.  
  117. #Data export to datasheet
  118. new_dataframe = pandas.DataFrame(
  119.     {
  120.                                  "Date": dates,
  121.                                  "Num of comments on 30 posts": comments,
  122.                                  "Score on 30 posts": scores,
  123.                                  "Positive Sentiment": positive_sentiment,
  124.                                  "Negative Sentiment": negative_sentiment,
  125.                                  "P/N": positive_slash_negative,
  126.                                  "Sentiment Score": sentiment_score
  127.     }
  128. )
  129. writer = pandas.ExcelWriter('output.xlsx')
  130. new_dataframe.to_excel(writer,'Sheet1')
  131. writer.save()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement