Guest User

Untitled

a guest
Oct 13th, 2013
83
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python3
  2. # coding: utf-8
  3.  
  4. import argparse
  5. import sys
  6. import re
  7. import os
  8. from urllib.request import urlopen, urlretrieve
  9. from threading import Thread
  10. from queue import Queue
  11. from time import time
  12.  
  13. q = Queue()
  14.  
  15.  
  16. def parse_thread(board, thread):
  17.     url = ('http://2ch.hk/%s/res/%s.html' % (board, thread))
  18.     data = urlopen(url).read()
  19.     links = re.findall('<a target="_blank" href="(.*?)">', str(data))
  20.     print('found %s images' % len(links))
  21.  
  22.     for i, src in enumerate(links):
  23.         links[i] = ('http://2ch.hk' + src)
  24.     return links
  25.  
  26.  
  27. def download(path):
  28.     while True:
  29.         i, url = q.get()
  30.         fileName = url.split('/')[-1]
  31.         print(fileName)
  32.         file_path = path + fileName
  33.         urlretrieve(url, file_path)
  34.         q.task_done()
  35.  
  36.  
  37. if __name__ == "__main__":
  38.     parser = argparse.ArgumentParser(description='Download picture from 2ch')
  39.     parser.add_argument('-b', '--board', type=str, help='board name')
  40.     parser.add_argument('-t', '--thread', type=int, help='thread number')
  41.     parser.add_argument(
  42.         '-th', type=int, default=2, help='multi-thread a download, default 2')
  43.     options = parser.parse_args()
  44.     if len(sys.argv) == 1:
  45.         parser.print_help()
  46.         sys.exit(1)
  47.  
  48.     img_list = parse_thread(options.board, options.thread)
  49.  
  50.     path = ('/[%s]%s/' % (options.board, options.thread))
  51.     path = os.getcwd() + path
  52.     if not os.path.exists(path):
  53.         os.makedirs(path)
  54.  
  55.     print('download...')
  56.     start = time()
  57.     for i in range(options.th):
  58.         t = Thread(target=download, args=[path])
  59.         t.daemon = True
  60.         t.start()
  61.  
  62.     for i, url in enumerate(img_list):
  63.         q.put((i, url))
  64.     q.join()
  65.     print('time: %.2f' % (time() - start))
RAW Paste Data