Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import praw
- import sqlite3
- import sys
- import time
- from itertools import chain
- time_start = time.time()
- r = praw.Reddit(client_id='',
- client_secret='',
- user_agent='',
- password='',
- username='')
- user = r.redditor(sys.argv[1])
- print('Generating database...')
- # Personally I would create one database and include
- # the author and then select * from comments where author=?
- # but I kind of assume you like this functionality
- # initializing database
- conn = sqlite3.connect('archive.db')
- c = conn.cursor()
- #creates table for user if it doesnt already exist
- c.execute('CREATE TABLE IF NOT EXISTS comments('
- 'permalink TEXT,'
- 'subreddit TEXT,'
- 'author TEXT,'
- 'comment TEXT,'
- 'score INTEGER,'
- 'timestamp INTEGER,'
- 'controversiality INTEGER,'
- 'edited TEXT,'
- 'score_hidden TEXT,'
- 'gilded INTEGER,'
- 'distinguished TEXT,'
- 'author_flair_css_class TEXT,'
- 'author_flair_text TEXT,'
- 'comment_id TEXT PRIMARY KEY)')
- def get_comments():
- for comment in chain(user.comments.new(limit=1000),
- user.comments.top(limit=1000)):
- permalink = 'reddit.com/r/{}/comments/{}//{}'.format(
- comment.subreddit,
- comment.submission,
- comment)
- yield (
- permalink,
- comment.subreddit.display_name,
- comment.author.name,
- comment.body,
- comment.score,
- comment.created_utc,
- comment.controversiality,
- comment.edited,
- comment.score_hidden,
- comment.gilded,
- comment.distinguished,
- comment.author_flair_css_class,
- comment.author_flair_text,
- comment.id
- )
- # Probably best to just get rid of the number of new comments here, generators are more efficient
- print('Starting archival...')
- #adding score, permalink, timestamp, etc, to the database
- c.executemany('INSERT OR REPLACE INTO comments('
- 'permalink,'
- 'subreddit,'
- 'author,'
- 'comment,'
- 'score,'
- 'timestamp,'
- 'controversiality,'
- 'edited,'
- 'score_hidden,'
- 'gilded,'
- 'distinguished,'
- 'author_flair_css_class,'
- 'author_flair_text,'
- 'comment_id)'
- 'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
- get_comments())
- conn.commit()
- #just lets you know how long the program ran for
- seconds = time.time()-time_start
- m,s = divmod(seconds,60)
- h,m = divmod(m, 60)
- print('Finished archiving /u/{} in {:f} hours, {:02f} minutes, and {:02f} seconds'.format(
- user, h, m, s))
- c.close()
- conn.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement