Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import urllib2
- from sys import argv
- def error(n, e = ''):
- print {
- 1: 'Invalid arguments or arguments is not set. Format: python picgrab.py [url] [pack]',
- 2: 'HTTP Error Occured: ' + e,
- 3: 'URL Error Occured: ' + e
- }[n]
- raise SystemExit
- def run(url):
- print('Fetching page ' + url + '...')
- html = fetch(url)
- print('Retrieving image paths..')
- paths = parse(html)
- def fetch(url):
- request = urllib2.Request(url)
- request.add_header('Referer', 'http://lurkmore.to')
- request.add_header('User-Agent', 'Mozilla/5.0 Firefox/39.0')
- try:
- response = urllib2.urlopen(request)
- except urllib2.HTTPError, e:
- error(2, e.code)
- except urllib2.URLError, e:
- error(3, e.args)
- return response.read()
- def parse(html):
- img_paths = re.findall(r"/[a-z]+/src/[0-9]+/[0-9]+.jpg", html)
- print(str(len(img_paths)) + ' images found. Removing duplicates...')
- img_paths = list(set(img_paths))
- print(str(len(img_paths)) + ' images left after removing duplicated. Downloading started.')
- load_pics(img_paths)
- def load_pics(paths):
- i = 1
- packname = argv[2]
- os.mkdir(packname)
- for p in paths:
- print('Fetching image ' + str(i) + ' of ' + str(len(paths)) )
- f = open(packname + '/' + str(i) + '.jpg', 'wb')
- f.write(urllib2.urlopen('https://2ch.hk' + p).read())
- f.close()
- i+=1
- def init():
- regexp = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
- if (len(argv) == 3 and re.match(regexp, argv[1])):
- run(argv[1])
- else:
- error(1)
- __name__ == "__main__" and init()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement