Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # coding: utf-8
- import argparse
- import sys
- import re
- import os
- from urllib.request import urlopen, urlretrieve
- from threading import Thread
- from queue import Queue
- from time import time
- q = Queue()
- def parse_thread(board, thread):
- url = ('http://2ch.hk/%s/res/%s.html' % (board, thread))
- data = urlopen(url).read()
- links = re.findall('<a target="_blank" href="(.*?)">', str(data))
- print('found %s images' % len(links))
- for i, src in enumerate(links):
- links[i] = ('http://2ch.hk' + src)
- return links
- def download(path):
- while True:
- i, url = q.get()
- fileName = url.split('/')[-1]
- print(fileName)
- file_path = path + fileName
- urlretrieve(url, file_path)
- q.task_done()
- if __name__ == "__main__":
- parser = argparse.ArgumentParser(description='Download picture from 2ch')
- parser.add_argument('-b', '--board', type=str, help='board name')
- parser.add_argument('-t', '--thread', type=int, help='thread number')
- parser.add_argument(
- '-th', type=int, default=2, help='multi-thread a download, default 2')
- options = parser.parse_args()
- if len(sys.argv) == 1:
- parser.print_help()
- sys.exit(1)
- img_list = parse_thread(options.board, options.thread)
- path = ('/[%s]%s/' % (options.board, options.thread))
- path = os.getcwd() + path
- if not os.path.exists(path):
- os.makedirs(path)
- print('download...')
- start = time()
- for i in range(options.th):
- t = Thread(target=download, args=[path])
- t.daemon = True
- t.start()
- for i, url in enumerate(img_list):
- q.put((i, url))
- q.join()
- print('time: %.2f' % (time() - start))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement