Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- import dataset
- import praw
- LIMIT = None
- def normalize_submission(submission):
- d = {}
- for k, v in submission.__dict__.items():
- if k in ('author', 'subreddit'):
- v = str(v) # for Redditor and Subreddit instance
- elif k == 'id':
- v = int(v, 36)
- elif k.startswith('_'):
- continue
- elif isinstance(v, (dict, list)):
- continue
- d[k] = v
- return d
- def normalize_comment(comment):
- return normalize_submission(comment)
- def normalize_subreddit(subreddit):
- d = {}
- for k, v in subreddit.items():
- if k in ('banner_size', 'icon_size'):
- v = ','.join(str(e) for e in v)
- elif k == 'id':
- v = int(v, 36)
- elif k.startswith('_'):
- continue
- elif isinstance(v, (dict, list)):
- continue
- d[k] = v
- return d
- def normalize_redditor(redditor):
- d = {}
- for k, v in redditor.__dict__.items():
- if k.startswith('_'):
- continue
- elif k == 'id':
- v = int(v, 36)
- elif k == 'subreddit':
- continue
- d[k] = v
- return d
- def main():
- if len(sys.argv) != 3:
- sys.stderr.write('Usage: %s PROFILE SUBREDDIT\n')
- sys.exit(1)
- reddit = praw.Reddit(sys.argv[1])
- reddit.read_only = True
- subreddit = reddit.subreddit(sys.argv[2])
- database = dataset.connect('sqlite:///reddit.db')
- table = database['submission']
- for submission in subreddit.new(limit=LIMIT):
- print('submission %s' % submission.id)
- table.insert(normalize_submission(submission))
- table = database['comment']
- for comment in subreddit.comments(limit=LIMIT):
- print('comment %s' % comment.id)
- table.insert(normalize_comment(comment))
- authors = []
- for row in database['submission'].distinct('author'):
- authors.append(row['author'])
- for row in database['comment'].distinct('author'):
- authors.append(row['author'])
- authors = set(authors)
- redditor_table = database['redditor']
- subreddit_table = database['subreddit']
- for author in authors:
- redditor = reddit.redditor(author)
- print('redditor %s' % redditor.id)
- redditor_table.insert(normalize_redditor(redditor))
- if redditor.subreddit:
- print('subreddit %s' % redditor.subreddit['name'])
- subreddit_table.insert(normalize_subreddit(redditor.subreddit))
- for row in database['subreddit']:
- print('\t'.join((row['display_name'], row['title'], row['public_description'])))
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement