Advertisement
Guest User

Untitled

a guest
Apr 24th, 2019
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.84 KB | None | 0 0
  1. import psaw
  2. import ujson as json
  3. import urllib3
  4.  
  5. http = urllib3.PoolManager()
  6.  
  7. api = psaw.PushshiftAPI()
  8.  
  9. lastdate = 1286870618 #epoch time of r/sn first post
  10.  
  11. def fetchdata(lastdate):
  12. fields =('selftext',
  13. 'author',
  14. 'id',
  15. 'permalink',
  16. 'created_utc',
  17. 'title',
  18. 'subreddit_id')
  19.  
  20. subrsubs = http.request('GET', 'https://api.pushshift.io/reddit/search/submission',
  21. fields = {'subreddit': 'suicidenotes',
  22. 'sort': 'asc',
  23. 'limit': '50',
  24. 'after': str(lastdate)})
  25. parsed = json.loads(subrsubs.data.decode('utf-8'))['data']
  26. if parsed.__len__() == 0:
  27. return False
  28. suidict = {'data': [], 'nonselfposts': []}
  29. for i in parsed:
  30. if 'selftext' not in i:
  31. suidict['nonselfposts'].append(i)
  32. else:
  33. suidict['data'].append({})
  34. for key in fields:
  35. suidict['data'][suidict['data'].__len__()-1][key] = i[key]
  36. suidict['firstdate'] = suidict['data'][0]['created_utc']
  37. suidict['lastdate'] = suidict['data'][suidict['data'].__len__()-1]['created_utc']
  38. suidict['sub_id'] = suidict['data'][0]['subreddit_id']
  39. return suidict
  40.  
  41. fetch = True
  42.  
  43. while fetch == True:
  44. dataout = fetchdata(lastdate)
  45. if dataout == False:
  46. fetch = False
  47. else:
  48. lastdate = dataout['lastdate']
  49. fname = "_".join([dataout['sub_id'],
  50. str(dataout['firstdate']),
  51. str(dataout['lastdate'])])
  52. with open(fname + ".json", 'w') as outfile:
  53. json.dump(dataout, outfile)
  54.  
  55.  
  56. # with open('sui.json', 'w') as outfile:
  57. # ujson.dump(suinotes, outfile)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement