dwango

import html
import os
import time
from concurrent.futures import ThreadPoolExecutor
from urllib.parse import unquote

import requests
from bs4 import BeautifulSoup
from tqdm import tqdm

download_dir = "blacksuit"
os.makedirs(download_dir, exist_ok=True)
max_workers = 10
index_url = (
    "http://ro4h37fieb6oyfrwoi5u5wpvaalnegsxzxnwzwzw43anxqmv6hjcsfyd.onion/dwango/"
)

session = requests.Session()
session.proxies = {
    "http": "socks5h://127.0.0.1:9150",
    "https": "socks5h://127.0.0.1:9150",
}
session.headers = {}
headers = {
    "Host": "weg7sdx54bevnvulapqu6bpzwztryeflq3s23tegbmnhkbpqz637f2yd.onion",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/115.0",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.5",
    "Accept-Encoding": "gzip, deflate, br",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
    "Sec-Fetch-Dest": "document",
    "Sec-Fetch-Mode": "navigate",
    "Sec-Fetch-Site": "cross-site",
    "Sec-Fetch-User": "?1",
}


def download_file(file_url, file_name, dir_path, retries=3):
    file_name = os.path.basename(file_name)
    file_name = os.path.join(dir_path, file_name)

    if os.path.exists(file_name):
        print(f"Skipping {file_name}, already exists.")
        return

    attempt = 0
    while attempt < retries:
        try:
            file_response = session.get(file_url, headers=headers, stream=True)
            total_size = int(file_response.headers.get("content-length", 0))
            with open(file_name, "wb") as file, tqdm(
                desc=file_name,
                total=total_size,
                unit="iB",
                unit_scale=True,
                unit_divisor=1024,
            ) as bar:
                for data in file_response.iter_content(chunk_size=1024):
                    size = file.write(data)
                    bar.update(size)
            return
        except Exception as e:
            print(f"Error downloading {file_url}: {e}")
            attempt += 1
            if attempt < retries:
                print(f"Retrying {file_url} (attempt {attempt + 1}/{retries})...")
                time.sleep(2)
            else:
                print(f"Failed to download {file_url} after {retries} attempts.")


def download_files(url, dir_path):
    while True:
        try:
            r = session.get(url, headers=headers)
            soup = BeautifulSoup(r.text, "html.parser")

            links = [a["href"] for a in soup.find_all("a") if a["href"] != "../"]
            unique_links = list(set(links))
            with ThreadPoolExecutor(max_workers=max_workers) as executor:
                for link in unique_links:
                    file_url = url + link
                    if link.endswith("/"):
                        new_dir_path = os.path.join(dir_path, link.strip("/"))
                        os.makedirs(new_dir_path, exist_ok=True)
                        download_files(file_url, new_dir_path)
                    else:
                        file_name = os.path.basename(link)
                        file_name = unquote(file_name)
                        file_name = html.unescape(file_name)
                        executor.submit(download_file, file_url, file_name, dir_path)
            return
        except requests.exceptions.ConnectionError as e:
            print(f"Connection error: {e}")
            print(f"Retrying {url}...")
            time.sleep(2)


download_files(index_url, download_dir)