Untitled

#!/usr/bin/env python3
# coding: utf-8

import argparse
import sys
import re
import os
from urllib.request import urlopen, urlretrieve
from threading import Thread
from queue import Queue
from time import time

q = Queue()


def parse_thread(board, thread):
    url = ('http://2ch.hk/%s/res/%s.html' % (board, thread))
    data = urlopen(url).read()
    links = re.findall('<a target="_blank" href="(.*?)">', str(data))
    print('found %s images' % len(links))

    for i, src in enumerate(links):
        links[i] = ('http://2ch.hk' + src)
    return links


def download(path):
    while True:
        i, url = q.get()
        fileName = url.split('/')[-1]
        print(fileName)
        file_path = path + fileName
        urlretrieve(url, file_path)
        q.task_done()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Download picture from 2ch')
    parser.add_argument('-b', '--board', type=str, help='board name')
    parser.add_argument('-t', '--thread', type=int, help='thread number')
    parser.add_argument(
        '-th', type=int, default=2, help='multi-thread a download, default 2')
    options = parser.parse_args()
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    img_list = parse_thread(options.board, options.thread)

    path = ('/[%s]%s/' % (options.board, options.thread))
    path = os.getcwd() + path
    if not os.path.exists(path):
        os.makedirs(path)

    print('download...')
    start = time()
    for i in range(options.th):
        t = Thread(target=download, args=[path])
        t.daemon = True
        t.start()

    for i, url in enumerate(img_list):
        q.put((i, url))
    q.join()
    print('time: %.2f' % (time() - start))