Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from urllib.request import Request, urlopen, urlretrieve, HTTPError
- from bs4 import BeautifulSoup
- import re
- import sys
- mystrip = lambda s, ss: s[:s.index(ss) + len(ss)]
- sqjpg = "_square.jpg"
- usrcount = 1
- countstop = 813687 #change for if statement for updates
- imgdir = "C:/scrape/"
- while usrcount < countstop:
- req = Request('http://roosterteeth.com/members/images/?uid=' + str(usrcount),headers={'User-Agent': 'Mozilla/5.0'})
- html = urlopen(req).read()
- soup = BeautifulSoup(html)
- usertitle = soup.title.string
- username = usertitle.split(" ")[3]
- print(username)
- req2 = Request('http://s3.roosterteeth.com/images/' + str(username) + str(sqjpg),headers={'User-Agent': 'Mozilla/5.0'})
- html2 = urlopen(req2).read()
- print(req2)
- soup = BeautifulSoup(html2)
- u2title = soup.title.string
- print(u2title)
- if u2title == req:
- usrcount += 1
- else:
- urlretrieve('http://s3.roosterteeth.com/images/' + str(username) + str(sqjpg),str(imgdir) + str(username) + ".jpg")
- if usrcount == 11: #Was patched, but for future failures, will endure
- usrcount += 2
- else:
- usrcount += 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement