Sixem

Python - 4chan Image Downloader

Nov 6th, 2014
293
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.50 KB | None | 0 0
  1. #!/usr/bin/python
  2. # Usage: 4c.py "http://boards.4chan.org/*/thread/*" "/home/user/download/location"
  3. import sys
  4. import urllib2
  5. import re
  6. import math
  7. import os
  8. import HTMLParser
  9. import errno
  10. from BeautifulSoup import BeautifulSoup
  11. def mspexists(path):
  12.     try:
  13.         os.makedirs(path)
  14.         return 0
  15.     except OSError as exception:
  16.         if exception.errno != errno.EEXIST:
  17.             raise
  18. h=HTMLParser.HTMLParser()
  19. uagent='Mozilla Firefox'
  20. dpath=str(sys.argv[2])
  21. mspexists(dpath)
  22. req=urllib2.Request(str(sys.argv[1]), headers={'User-Agent' : uagent})
  23. html=urllib2.urlopen(req)
  24. soup=BeautifulSoup(html)
  25. imgs=soup.findAll("div", {"class":"file"})
  26. cimg=len(imgs)
  27. cc=0
  28. print("Found: %s images! Starting download." % cimg)
  29. for img in imgs:
  30.         cc+=1
  31.         i=("http:%s" % img.a['href'])
  32.         fname = img.a.text  
  33.         if str(img.a).find("title=")==-1:
  34.             pass
  35.         else:
  36.             j = (str(img.a).split('title="')[1])
  37.             fname=(str(j).split('"')[0])
  38.         fname=h.unescape(fname)
  39.         req=urllib2.Request(i,headers={'User-Agent' : uagent})
  40.         u=urllib2.urlopen(req)
  41.         if os.path.exists(('%s/%s' % (dpath,fname))):
  42.                 print("[%s] %s" % (("%s/%s" % (cc,cimg)),"File already exists - Skipping"))
  43.         else:
  44.                 print("[%s] %s" % (("%s/%s" % (cc,cimg)),fname))
  45.                 sFile = open(('%s/%s' % (dpath,fname)),'w')
  46.                 sFile.write(u.read())
  47.                 sFile.close()
  48. print('All done!')
Add Comment
Please, Sign In to add comment