Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """pull all the Screenshot saturday /r/gamedev posts"""
- import requests
- import re
- url = r'http://www.reddit.com/r/gamedev/comments/186b6n/screenshot_saturday_105_one_does_not_simply/'
- ##the plan is to pull all the links from the body of the post and pull the links
- ## with Screenshot_saturday in them. without using PRAW or my reddit module
- #regex to find markup urls
- link_markup = r"\[(.*)\]\((.*?)\)"
- #don't worry about 404s, not found etc for this quick ex
- # assuming all SS are numbered
- #this is the current SS we're on, the next one will have current_ss - 1 in it,
- # so 104
- next_ss=104
- final_results =[]
- while next_ss > 0:
- #hardcoded because the link is wrong in the 102 post, so it's sort of skipping the stuff
- if next_ss == 101:
- print '\tskipping 102 stuff'
- url = \
- r"http://www.reddit.com/r/gamedev/comments/16f3b4/screenshot_saturday_101_battle_on/"
- proper_url = "{0}.json".format(url)
- final_results.append(("101", url))
- elif next_ss == 70:
- url = \
- r'http://www.reddit.com/r/gamedev/comments/ustcw/screenshot_saturday_70_yes_another_one/'
- proper_url = "{0}.json".format(url)
- else:
- proper_url = "{0}.json".format(url)
- r = requests.get(proper_url)
- #find the self text in the http response
- self_text = r.json[0]['data']['children'][0]['data']['selftext']
- #find the possible links in the self text
- groups = re.findall(link_markup,self_text)
- #find the links that match 'screenshot'
- group_matches = []
- for grp in groups:
- if "screenshot" in grp[1] and str(next_ss) in grp[1]:
- group_matches.append(grp)
- print '\tfound', grp[0]
- #only need the last one, since it's the most previous one
- #no matches, decrement next_ss and try again
- if group_matches == []:
- print 'cant find',next_ss,'skipping'
- else:
- last_link = group_matches[0]
- url = last_link[1]
- final_results.append(last_link)
- print 'for', next_ss,
- print 'saving and using', last_link[0], 'at', url
- next_ss-=1
- print 'done entire loop'
- #write to file
- with open(r'c:/ss.txt','w') as f:
- for pair in final_results:
- # needs to write pair[0] for the title, but unicode is too big of a barrier
- # at 1:42 am on a friday night
- f.write(u'{}\n'.format(pair[1]))
Add Comment
Please, Sign In to add comment