TankorSmash

rough draft of SS gathering

Feb 9th, 2013
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.48 KB | None | 0 0
  1. """pull all the Screenshot saturday /r/gamedev posts"""
  2.  
  3. import requests
  4. import re
  5.  
  6. url = r'http://www.reddit.com/r/gamedev/comments/186b6n/screenshot_saturday_105_one_does_not_simply/'
  7.  
  8. ##the plan is to pull all the links from the body of the post and pull the links
  9. ## with Screenshot_saturday in them. without using PRAW or my reddit module
  10.  
  11. #regex to find markup urls
  12. link_markup = r"\[(.*)\]\((.*?)\)"
  13.  
  14. #don't worry about 404s, not found etc for this quick ex
  15. # assuming all SS are numbered
  16.  
  17. #this is the current SS we're on, the next one will have current_ss - 1 in it,
  18. # so 104
  19. next_ss=104
  20. final_results =[]
  21. while next_ss > 0:
  22.    
  23.     #hardcoded because the link is wrong in the 102 post, so it's sort of skipping the stuff
  24.     if next_ss == 101:
  25.         print '\tskipping 102 stuff'
  26.         url = \
  27.             r"http://www.reddit.com/r/gamedev/comments/16f3b4/screenshot_saturday_101_battle_on/"
  28.         proper_url = "{0}.json".format(url)
  29.         final_results.append(("101", url))
  30.     elif next_ss == 70:
  31.         url = \
  32.             r'http://www.reddit.com/r/gamedev/comments/ustcw/screenshot_saturday_70_yes_another_one/'
  33.         proper_url = "{0}.json".format(url)
  34.     else:
  35.         proper_url = "{0}.json".format(url)
  36.         r = requests.get(proper_url)
  37.         #find the self text in the http response
  38.         self_text = r.json[0]['data']['children'][0]['data']['selftext']
  39.        
  40.         #find the possible links in the self text
  41.         groups = re.findall(link_markup,self_text)
  42.         #find the links that match 'screenshot'
  43.         group_matches = []
  44.         for grp in groups:
  45.             if "screenshot" in grp[1] and str(next_ss) in grp[1]:
  46.                 group_matches.append(grp)
  47.                 print '\tfound', grp[0]
  48.         #only need the last one, since it's the most previous one
  49.         #no matches, decrement next_ss and try again
  50.         if group_matches == []:
  51.             print 'cant find',next_ss,'skipping'
  52.         else:
  53.             last_link = group_matches[0]
  54.             url = last_link[1]
  55.             final_results.append(last_link)
  56.             print 'for', next_ss,
  57.             print 'saving and using', last_link[0], 'at',  url
  58.     next_ss-=1
  59.    
  60. print 'done entire loop'
  61.  
  62. #write to file
  63. with open(r'c:/ss.txt','w') as f:
  64.     for pair in final_results:
  65.         # needs to write pair[0] for the title, but unicode is too big of a barrier
  66.         # at 1:42 am on a friday night
  67.         f.write(u'{}\n'.format(pair[1]))
Add Comment
Please, Sign In to add comment