Advertisement
Guest User

Untitled

a guest
Aug 14th, 2017
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.02 KB | None | 0 0
  1. import praw
  2. import sqlite3
  3. import sys
  4. import time
  5. from itertools import chain
  6. time_start = time.time()
  7.  
  8. r = praw.Reddit(client_id='',
  9. client_secret='',
  10. user_agent='',
  11. password='',
  12. username='')
  13.  
  14. user = r.redditor(sys.argv[1])
  15.  
  16. print('Generating database...')
  17. # Personally I would create one database and include
  18. # the author and then select * from comments where author=?
  19. # but I kind of assume you like this functionality
  20.  
  21. # initializing database
  22. conn = sqlite3.connect('archive.db')
  23. c = conn.cursor()
  24. #creates table for user if it doesnt already exist
  25. c.execute('CREATE TABLE IF NOT EXISTS comments('
  26. 'permalink TEXT,'
  27. 'subreddit TEXT,'
  28. 'author TEXT,'
  29. 'comment TEXT,'
  30. 'score INTEGER,'
  31. 'timestamp INTEGER,'
  32. 'controversiality INTEGER,'
  33. 'edited TEXT,'
  34. 'score_hidden TEXT,'
  35. 'gilded INTEGER,'
  36. 'distinguished TEXT,'
  37. 'author_flair_css_class TEXT,'
  38. 'author_flair_text TEXT,'
  39. 'comment_id TEXT PRIMARY KEY)')
  40.  
  41. def get_comments():
  42. for comment in chain(user.comments.new(limit=1000),
  43. user.comments.top(limit=1000)):
  44. permalink = 'reddit.com/r/{}/comments/{}//{}'.format(
  45. comment.subreddit,
  46. comment.submission,
  47. comment)
  48. yield (
  49. permalink,
  50. comment.subreddit.display_name,
  51. comment.author.name,
  52. comment.body,
  53. comment.score,
  54. comment.created_utc,
  55. comment.controversiality,
  56. comment.edited,
  57. comment.score_hidden,
  58. comment.gilded,
  59. comment.distinguished,
  60. comment.author_flair_css_class,
  61. comment.author_flair_text,
  62. comment.id
  63. )
  64.  
  65. # Probably best to just get rid of the number of new comments here, generators are more efficient
  66. print('Starting archival...')
  67. #adding score, permalink, timestamp, etc, to the database
  68. c.executemany('INSERT OR REPLACE INTO comments('
  69. 'permalink,'
  70. 'subreddit,'
  71. 'author,'
  72. 'comment,'
  73. 'score,'
  74. 'timestamp,'
  75. 'controversiality,'
  76. 'edited,'
  77. 'score_hidden,'
  78. 'gilded,'
  79. 'distinguished,'
  80. 'author_flair_css_class,'
  81. 'author_flair_text,'
  82. 'comment_id)'
  83. 'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
  84. get_comments())
  85.  
  86. conn.commit()
  87.  
  88. #just lets you know how long the program ran for
  89. seconds = time.time()-time_start
  90. m,s = divmod(seconds,60)
  91. h,m = divmod(m, 60)
  92. print('Finished archiving /u/{} in {:f} hours, {:02f} minutes, and {:02f} seconds'.format(
  93. user, h, m, s))
  94.  
  95.  
  96. c.close()
  97. conn.close() 
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement