Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import sys
- import urllib
- import urllib2
- import re
- import time
- if not len(sys.argv) >= 3:
- print "Missing parameters."
- print "Usage: python 4chan.py <url> <folder>"
- sys.exit()
- threadurl = sys.argv[1]
- subfolder = sys.argv[2]
- exp_imgurl = re.compile('4chan\.org/\w+/src/\d+\.(?:jpg|gif|png|jpeg)')
- exp_picname = re.compile('\d+\.(?:jpg|gif|png|jpeg)')
- ua = "Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US; rv:1.9.1.4) Gecko/20091007 Firefox/3.5.4"
- head = {'User-agent': ua}
- print "Thread %s going to folder %s" % (threadurl, subfolder)
- print "Fetching html..."
- req = urllib2.Request(threadurl, None, head)
- try:
- response = urllib2.urlopen(req)
- except urllib2.HTTPError, e:
- if errorcount < 1:
- errorcount = 1
- print "Request failed"
- response = urllib2.urlopen(req)
- except urllib2.URLError, e:
- if errorcount < 1:
- errorcount = 1
- print "Request failed"
- response = urllib2.urlopen(req)
- msg = response.read()
- errorcount = 0
- print "Received %d bytes" % len(msg)
- imgurls = exp_imgurl.findall(msg)
- print "Found %d images" % len(imgurls)
- if not os.path.exists(subfolder):
- print "Folder %s does not exist. Creating..." % subfolder
- os.makedirs(subfolder)
- else:
- print "Folder %s exists. I will just put all files in there." % subfolder
- totalnumber = len(list(set(imgurls)))
- for i, img in enumerate(list(set(imgurls))):
- source = "http://images."+str(img)
- filename = exp_picname.findall(source)[0]
- destination = os.path.join(subfolder, filename)
- if not os.path.isfile(destination):
- try:
- print "Downloading %d/%d: %s" % (i+1, totalnumber, source)
- urllib.urlretrieve(source, destination)
- time.sleep(0.25) # why?
- except urllib.ContentTooShortError:
- print "Image download failed, retrying..."
- time.sleep(1)
- urllib.urlretrieve(source, destination)
- time.sleep(0.5) # why?
- else:
- print "File %s exists. Skipping..." % str(filename)
- print "Aaaaaaand we are done. See you next time."
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement