Advertisement
Guest User

Untitled

a guest
Jun 26th, 2017
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.56 KB | None | 0 0
  1. import sys
  2.  
  3. import dataset
  4. import praw
  5.  
  6.  
  7. LIMIT = None
  8.  
  9. def normalize_submission(submission):
  10. d = {}
  11. for k, v in submission.__dict__.items():
  12. if k in ('author', 'subreddit'):
  13. v = str(v) # for Redditor and Subreddit instance
  14. elif k == 'id':
  15. v = int(v, 36)
  16. elif k.startswith('_'):
  17. continue
  18. elif isinstance(v, (dict, list)):
  19. continue
  20. d[k] = v
  21. return d
  22.  
  23.  
  24. def normalize_comment(comment):
  25. return normalize_submission(comment)
  26.  
  27.  
  28. def normalize_subreddit(subreddit):
  29. d = {}
  30. for k, v in subreddit.items():
  31. if k in ('banner_size', 'icon_size'):
  32. v = ','.join(str(e) for e in v)
  33. elif k == 'id':
  34. v = int(v, 36)
  35. elif k.startswith('_'):
  36. continue
  37. elif isinstance(v, (dict, list)):
  38. continue
  39. d[k] = v
  40. return d
  41.  
  42.  
  43. def normalize_redditor(redditor):
  44. d = {}
  45. for k, v in redditor.__dict__.items():
  46. if k.startswith('_'):
  47. continue
  48. elif k == 'id':
  49. v = int(v, 36)
  50. elif k == 'subreddit':
  51. continue
  52. d[k] = v
  53. return d
  54.  
  55.  
  56. def main():
  57. if len(sys.argv) != 3:
  58. sys.stderr.write('Usage: %s PROFILE SUBREDDIT\n')
  59. sys.exit(1)
  60.  
  61. reddit = praw.Reddit(sys.argv[1])
  62. reddit.read_only = True
  63. subreddit = reddit.subreddit(sys.argv[2])
  64.  
  65. database = dataset.connect('sqlite:///reddit.db')
  66.  
  67. table = database['submission']
  68. for submission in subreddit.new(limit=LIMIT):
  69. print('submission %s' % submission.id)
  70. table.insert(normalize_submission(submission))
  71.  
  72. table = database['comment']
  73. for comment in subreddit.comments(limit=LIMIT):
  74. print('comment %s' % comment.id)
  75. table.insert(normalize_comment(comment))
  76.  
  77. authors = []
  78. for row in database['submission'].distinct('author'):
  79. authors.append(row['author'])
  80. for row in database['comment'].distinct('author'):
  81. authors.append(row['author'])
  82. authors = set(authors)
  83.  
  84. redditor_table = database['redditor']
  85. subreddit_table = database['subreddit']
  86. for author in authors:
  87. redditor = reddit.redditor(author)
  88. print('redditor %s' % redditor.id)
  89. redditor_table.insert(normalize_redditor(redditor))
  90. if redditor.subreddit:
  91. print('subreddit %s' % redditor.subreddit['name'])
  92. subreddit_table.insert(normalize_subreddit(redditor.subreddit))
  93.  
  94. for row in database['subreddit']:
  95. print('\t'.join((row['display_name'], row['title'], row['public_description'])))
  96.  
  97.  
  98. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement