SHARE
TWEET

Untitled

a guest Aug 2nd, 2012 410 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import os
  2. import sys
  3. import urllib
  4. import urllib2
  5. import re
  6. import time
  7.  
  8. if not len(sys.argv) >= 3:
  9.     print "Missing parameters."
  10.     print "Usage:    python 4chan.py <url> <folder>"
  11.     sys.exit()
  12.  
  13. threadurl = sys.argv[1]
  14. subfolder = sys.argv[2]
  15.  
  16. exp_imgurl = re.compile('4chan\.org/\w+/src/\d+\.(?:jpg|gif|png|jpeg)')
  17. exp_picname = re.compile('\d+\.(?:jpg|gif|png|jpeg)')
  18.  
  19. ua = "Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US; rv:1.9.1.4) Gecko/20091007 Firefox/3.5.4"
  20. head = {'User-agent': ua}
  21.  
  22. print "Thread %s going to folder %s" % (threadurl, subfolder)
  23.  
  24. print "Fetching html..."
  25.  
  26. req = urllib2.Request(threadurl, None, head)
  27. try:
  28.     response = urllib2.urlopen(req)
  29. except urllib2.HTTPError, e:
  30.     if errorcount < 1:
  31.         errorcount = 1
  32.         print "Request failed"
  33.         response = urllib2.urlopen(req)
  34. except urllib2.URLError, e:
  35.     if errorcount < 1:
  36.         errorcount = 1
  37.         print "Request failed"
  38.         response = urllib2.urlopen(req)
  39.  
  40. msg = response.read()
  41. errorcount = 0
  42.  
  43. print "Received %d bytes" % len(msg)
  44.  
  45. imgurls = exp_imgurl.findall(msg)
  46.  
  47. print "Found %d images" % len(imgurls)
  48.  
  49. if not os.path.exists(subfolder):
  50.     print "Folder %s does not exist. Creating..." % subfolder
  51.     os.makedirs(subfolder)
  52. else:
  53.     print "Folder %s exists. I will just put all files in there." % subfolder
  54.  
  55. totalnumber = len(list(set(imgurls)))
  56.  
  57. for i, img in enumerate(list(set(imgurls))):
  58.     source = "http://images."+str(img)
  59.     filename = exp_picname.findall(source)[0]
  60.     destination = os.path.join(subfolder, filename)
  61.     if not os.path.isfile(destination):
  62.         try:
  63.             print "Downloading %d/%d: %s" % (i+1, totalnumber, source)
  64.             urllib.urlretrieve(source, destination)
  65.             time.sleep(0.25) # why?
  66.         except urllib.ContentTooShortError:
  67.             print "Image download failed, retrying..."
  68.             time.sleep(1)
  69.             urllib.urlretrieve(source, destination)
  70.             time.sleep(0.5) # why?
  71.     else:
  72.         print "File %s exists. Skipping..." % str(filename)
  73.  
  74. print "Aaaaaaand we are done. See you next time."
RAW Paste Data
Top