Advertisement
Aareon

RT Scrape

Nov 24th, 2015
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.07 KB | None | 0 0
  1. from urllib.request import Request, urlopen, urlretrieve, HTTPError
  2. from bs4 import BeautifulSoup
  3. import re
  4. import sys
  5.  
  6. mystrip = lambda s, ss: s[:s.index(ss) + len(ss)]
  7. sqjpg = "_square.jpg"
  8. usrcount = 1
  9. countstop = 813687 #change for if statement for updates
  10. imgdir = "C:/scrape/"
  11.  
  12. while usrcount < countstop:
  13. req = Request('http://roosterteeth.com/members/images/?uid=' + str(usrcount),headers={'User-Agent': 'Mozilla/5.0'})
  14. html = urlopen(req).read()
  15. soup = BeautifulSoup(html)
  16. usertitle = soup.title.string
  17. username = usertitle.split(" ")[3]
  18. print(username)
  19.  
  20. req2 = Request('http://s3.roosterteeth.com/images/' + str(username) + str(sqjpg),headers={'User-Agent': 'Mozilla/5.0'})
  21. html2 = urlopen(req2).read()
  22. print(req2)
  23. soup = BeautifulSoup(html2)
  24. u2title = soup.title.string
  25. print(u2title)
  26.  
  27. if u2title == req:
  28. usrcount += 1
  29. else:
  30. urlretrieve('http://s3.roosterteeth.com/images/' + str(username) + str(sqjpg),str(imgdir) + str(username) + ".jpg")
  31. if usrcount == 11:    #Was patched, but for future failures, will endure
  32. usrcount += 2
  33. else:
  34. usrcount += 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement