Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import html
- import os
- import time
- from concurrent.futures import ThreadPoolExecutor
- from urllib.parse import unquote
- import requests
- from bs4 import BeautifulSoup
- from tqdm import tqdm
- download_dir = "blacksuit"
- os.makedirs(download_dir, exist_ok=True)
- max_workers = 10
- index_url = (
- "http://ro4h37fieb6oyfrwoi5u5wpvaalnegsxzxnwzwzw43anxqmv6hjcsfyd.onion/dwango/"
- )
- session = requests.Session()
- session.proxies = {
- "http": "socks5h://127.0.0.1:9150",
- "https": "socks5h://127.0.0.1:9150",
- }
- session.headers = {}
- headers = {
- "Host": "weg7sdx54bevnvulapqu6bpzwztryeflq3s23tegbmnhkbpqz637f2yd.onion",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/115.0",
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
- "Accept-Language": "en-US,en;q=0.5",
- "Accept-Encoding": "gzip, deflate, br",
- "Connection": "keep-alive",
- "Upgrade-Insecure-Requests": "1",
- "Sec-Fetch-Dest": "document",
- "Sec-Fetch-Mode": "navigate",
- "Sec-Fetch-Site": "cross-site",
- "Sec-Fetch-User": "?1",
- }
- def download_file(file_url, file_name, dir_path, retries=3):
- file_name = os.path.basename(file_name)
- file_name = os.path.join(dir_path, file_name)
- if os.path.exists(file_name):
- print(f"Skipping {file_name}, already exists.")
- return
- attempt = 0
- while attempt < retries:
- try:
- file_response = session.get(file_url, headers=headers, stream=True)
- total_size = int(file_response.headers.get("content-length", 0))
- with open(file_name, "wb") as file, tqdm(
- desc=file_name,
- total=total_size,
- unit="iB",
- unit_scale=True,
- unit_divisor=1024,
- ) as bar:
- for data in file_response.iter_content(chunk_size=1024):
- size = file.write(data)
- bar.update(size)
- return
- except Exception as e:
- print(f"Error downloading {file_url}: {e}")
- attempt += 1
- if attempt < retries:
- print(f"Retrying {file_url} (attempt {attempt + 1}/{retries})...")
- time.sleep(2)
- else:
- print(f"Failed to download {file_url} after {retries} attempts.")
- def download_files(url, dir_path):
- while True:
- try:
- r = session.get(url, headers=headers)
- soup = BeautifulSoup(r.text, "html.parser")
- links = [a["href"] for a in soup.find_all("a") if a["href"] != "../"]
- unique_links = list(set(links))
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
- for link in unique_links:
- file_url = url + link
- if link.endswith("/"):
- new_dir_path = os.path.join(dir_path, link.strip("/"))
- os.makedirs(new_dir_path, exist_ok=True)
- download_files(file_url, new_dir_path)
- else:
- file_name = os.path.basename(link)
- file_name = unquote(file_name)
- file_name = html.unescape(file_name)
- executor.submit(download_file, file_url, file_name, dir_path)
- return
- except requests.exceptions.ConnectionError as e:
- print(f"Connection error: {e}")
- print(f"Retrying {url}...")
- time.sleep(2)
- download_files(index_url, download_dir)
Add Comment
Please, Sign In to add comment