Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from urllib.request import urlopen
- import requests
- import os
- import sys
- import json
- from pathlib import Path
- def download(url, dst_dir):
- name = url.split('/')[-1].split('?')[0]
- filename = os.path.join(dst_dir, name)
- if not os.path.exists(filename):
- with open(filename, 'wb') as f:
- code = 999
- while code != 200:
- print('Downloading', url, f'\t[RETRY: {code}]' if code != 999 else '')
- try:
- r = requests.get(url)
- except:
- print('FAIL: ', url)
- return
- code = r.status_code
- f.write(r.content)
- return filename
- def urls_from_text_files(dir):
- url_dir_pairs = {}
- for file in os.listdir(src):
- if file.endswith('.txt'):
- with open(os.path.join(src, file), 'r') as f:
- dst_dir = file.split('.txt')[0]
- dst_dir = os.path.join(src, dst_dir)
- os.makedirs(dst_dir, exist_ok=True)
- urls = set(f.read().split(','))
- url_dir_pairs[dst_dir] = urls
- return url_dir_pairs
- def get_thread_urls():
- thread_url = input('Thread url: ')
- thread_id = thread_url.split('/res/')[-1].split('.')[0]
- thread_board = thread_url.split('/res/')[0].split('/')[-1]
- arch = 'arch' in thread_url
- if arch:
- url = thread_url.replace('html', 'json')
- else:
- url = f'https://2ch.hk/api/mobile/v2/after/{thread_board}/{thread_id}/{thread_id}'
- j = requests.get(url).json()
- # threads[].posts[].files[].path
- posts = j['threads'][0]['posts'] if arch else j['posts']
- file_posts = [file_post['files'] for file_post in posts if file_post['files']]
- files = [file for post in file_posts for file in post]
- paths = [path['path'] for path in files]
- urls = [('https://2ch.hk' + path) for path in paths if 'sticker' not in path]
- with open(os.path.join(src, thread_id+".txt"), 'w') as file:
- file.write(", ".join(urls))
- print(f"Saved {len(urls)} links.\n")
- def download_from_txt_lists():
- if len(sys.argv) > 1:
- for url in sys.argv[1:]:
- download(url, dst)
- else:
- print(f"Finds every TXT files in {src} directory and treats them as url list where each url must be downloaded in corresponding folder.")
- input('Press Enter to initiate the download process.')
- url_dir_pairs = urls_from_text_files(src)
- for dir in url_dir_pairs:
- for url in url_dir_pairs[dir]:
- download(url, dir)
- urls_count = sum([True for key in url_dir_pairs for url in url_dir_pairs[key]])
- input(f'{len(url_dir_pairs)} files, {urls_count} urls')
- if __name__ == "__main__":
- src = str(Path.home() / "Downloads")
- get_thread_urls()
- download_from_txt_lists()
Add Comment
Please, Sign In to add comment