Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import sys,os
- import httplib2, urllib,time
- http = httplib2.Http()
- count = int(sys.argv[1])
- retries = 0
- max_retries = int(sys.argv[2])
- images_path = "/var/www/4chanbanners/images/"
- if len(sys.argv) == 3:
- while count > 0 and retries <= 20:
- time.sleep(3)
- status, response = http.request('http://boards.4chan.org/b/')
- soup = BeautifulSoup(response).img["src"]
- filename = soup.split('/')[-1].split('#')[0].split('?')[0]
- if os.path.exists(images_path+filename):
- retries+= 1
- print "File " + filename + " already exists!"
- else:
- urllib.urlretrieve("http:"+soup,images_path+filename)
- print "Got " + filename
- retries = 0
- count -= 1
- else:
- print "Usage: parse.py IMAGE_COUNT MAX_RETRIES"
Advertisement
Add Comment
Please, Sign In to add comment