Guest User

Untitled

a guest
Apr 3rd, 2018
123
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.74 KB | None | 0 0
  1. #Import Python Reddit API Wrapper
  2. import praw
  3. #Import time to get the latest time
  4. import time
  5. #datetime to do unix/readable translations
  6. import datetime
  7. #We use this for making files and folders
  8. import os
  9.  
  10. #main code
  11. def main():
  12.  
  13. #Connect API to my APP
  14. reddit = praw.Reddit(client_id='', client_secret='', password='', user_agent='', username='')
  15. #ask what sub to archive
  16. subname = input('Input the subreddit to archive: ')
  17. #Set subreddit to what the user input
  18. subreddit = reddit.subreddit(subname)
  19.  
  20. #Archive an entire subreddit
  21. archive(subreddit,subname)
  22.  
  23. def archive(subreddit,subname):
  24. #make a folder named after the subreddit we're archiving if it doesn't currently exist
  25. if not os.path.exists(subname):
  26. os.makedirs(subname)
  27. #get the start date
  28. date1 = input('Input the start date YYYY/MM/DD: ')
  29. #make it unix
  30. date1 = time.mktime(datetime.datetime.strptime(date1, "%Y/%m/%d").timetuple())
  31. #adds 1 day to the first date - "Get all links from the first day"
  32. date2 = date1 + 86400
  33. #enters the directory we just made using the cd class made below
  34. #outside of the 'with' it goes back to the folder where the script is held
  35. with cd(subname):
  36. #loop until the current datetime
  37. while date2 < time.time():
  38. #for each submission between the two dates, process them
  39. for submission in subreddit.submissions(date1,date2):
  40. process_submission(submission)
  41. #add a day onto it
  42. date1+=86400
  43. date2+=86400
  44.  
  45. #What to do with each submission
  46. def process_submission(submission):
  47. title = submission.title
  48. #strip invalid characters from submissions title
  49. title = title.translate({ord(i):None for i in '/><?:|*"'})
  50. #translate doesn't work for backslashes lol
  51. title = title.replace("\\","")
  52. #strip whitespaces newlines etc
  53. title = title.strip()
  54. #Max length is 255 chars in Windows
  55. title=title[:240]
  56. #check if it's already archived
  57. if not os.path.exists(title):
  58. #print the title to console
  59. print(title)
  60. #make a file <post name>.txt if it doesn't already exist
  61. file = open(title+".txt","w", encoding='utf-8')
  62. #some metadata
  63. file.write("ID: "+submission.id+"\n")
  64. #make a readable date and write to the file
  65. readabledate = (datetime.datetime.fromtimestamp(int(submission.created)).strftime('%Y-%m-%d %H:%M:%S'))
  66. file.write("date: "+str(readabledate)+"\n")
  67. file.write("author: "+submission.author.name+"\n")
  68. file.write("url: "+submission.url+"\n")
  69. #write the selftext
  70. if submission.selftext != "":
  71. file.write("\n---------------------------------------\n\n")
  72. file.write(submission.selftext)
  73. file.write("\n\n---------------------------------------\n\n")
  74. submission.comments.replace_more(limit=None)
  75. for comment in submission.comments.list():
  76. readablecommentdate = (datetime.datetime.fromtimestamp(int(comment.created)).strftime('%Y-%m-%d %H:%M:%S'))
  77. file.write(comment.id+" // ")
  78. #if the author deleted their account it fails so
  79. if comment.author != None:
  80. file.write(comment.author.name)
  81. file.write(" // "+readablecommentdate+"\n")
  82. file.write(comment.body+"\n\n")
  83. file.close()
  84.  
  85.  
  86. #so we can change directories easily
  87. class cd:
  88. #Context manager for changing the current working directory
  89. def __init__(self, newPath):
  90. self.newPath = os.path.expanduser(newPath)
  91.  
  92. def __enter__(self):
  93. self.savedPath = os.getcwd()
  94. os.chdir(self.newPath)
  95.  
  96. def __exit__(self, etype, value, traceback):
  97. os.chdir(self.savedPath)
  98.  
  99. #actually run it lol
  100. main()
Add Comment
Please, Sign In to add comment