Thread Media Downloader

from urllib.request import urlopen
import requests
import os
import sys
import json
from pathlib import Path


def download(url, dst_dir):
    name = url.split('/')[-1].split('?')[0]
    filename = os.path.join(dst_dir, name)
    if not os.path.exists(filename):
        with open(filename, 'wb') as f:
            code = 999
            while code != 200:
                print('Downloading', url, f'\t[RETRY: {code}]' if code != 999 else '')
                try:
                    r = requests.get(url)
                except:
                    print('FAIL: ', url)
                    return
                code = r.status_code
            f.write(r.content)
        return filename

def urls_from_text_files(dir):
    url_dir_pairs = {}
    for file in os.listdir(src):
        if file.endswith('.txt'):
            with open(os.path.join(src, file), 'r') as f:
                dst_dir = file.split('.txt')[0]
                dst_dir = os.path.join(src, dst_dir)
                os.makedirs(dst_dir, exist_ok=True)
                urls = set(f.read().split(','))
                url_dir_pairs[dst_dir] = urls
    return url_dir_pairs

def get_thread_urls():
    thread_url = input('Thread url: ')
    thread_id = thread_url.split('/res/')[-1].split('.')[0]
    thread_board = thread_url.split('/res/')[0].split('/')[-1]

    arch = 'arch' in thread_url

    if arch:
        url = thread_url.replace('html', 'json')
    else:
        url = f'https://2ch.hk/api/mobile/v2/after/{thread_board}/{thread_id}/{thread_id}'
    j = requests.get(url).json()


    # threads[].posts[].files[].path
    posts = j['threads'][0]['posts'] if arch else j['posts']
    file_posts = [file_post['files'] for file_post in posts if file_post['files']]
    files = [file for post in file_posts for file in post]
    paths = [path['path'] for path in files]

    urls = [('https://2ch.hk' + path) for path in paths if 'sticker' not in path]

    with open(os.path.join(src, thread_id+".txt"), 'w') as file:
        file.write(", ".join(urls))

    print(f"Saved {len(urls)} links.\n")

def download_from_txt_lists():
    if len(sys.argv) > 1:
        for url in sys.argv[1:]:
            download(url, dst)
    else:
        print(f"Finds every TXT files in {src} directory and treats them as url list where each url must be downloaded in corresponding folder.")
        input('Press Enter to initiate the download process.')
        url_dir_pairs = urls_from_text_files(src)
        for dir in url_dir_pairs:
            for url in url_dir_pairs[dir]:
                download(url, dir)
        urls_count = sum([True for key in url_dir_pairs for url in url_dir_pairs[key]])
        input(f'{len(url_dir_pairs)} files, {urls_count} urls')

if __name__ == "__main__":
    src = str(Path.home() / "Downloads")
    get_thread_urls()
    download_from_txt_lists()