Guest User

dwango

a guest
Jul 1st, 2024
806
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.58 KB | Source Code | 0 0
  1. import html
  2. import os
  3. import time
  4. from concurrent.futures import ThreadPoolExecutor
  5. from urllib.parse import unquote
  6.  
  7. import requests
  8. from bs4 import BeautifulSoup
  9. from tqdm import tqdm
  10.  
  11. download_dir = "blacksuit"
  12. os.makedirs(download_dir, exist_ok=True)
  13. max_workers = 10
  14. index_url = (
  15.     "http://ro4h37fieb6oyfrwoi5u5wpvaalnegsxzxnwzwzw43anxqmv6hjcsfyd.onion/dwango/"
  16. )
  17.  
  18. session = requests.Session()
  19. session.proxies = {
  20.     "http": "socks5h://127.0.0.1:9150",
  21.     "https": "socks5h://127.0.0.1:9150",
  22. }
  23. session.headers = {}
  24. headers = {
  25.     "Host": "weg7sdx54bevnvulapqu6bpzwztryeflq3s23tegbmnhkbpqz637f2yd.onion",
  26.     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/115.0",
  27.     "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
  28.     "Accept-Language": "en-US,en;q=0.5",
  29.     "Accept-Encoding": "gzip, deflate, br",
  30.     "Connection": "keep-alive",
  31.     "Upgrade-Insecure-Requests": "1",
  32.     "Sec-Fetch-Dest": "document",
  33.     "Sec-Fetch-Mode": "navigate",
  34.     "Sec-Fetch-Site": "cross-site",
  35.     "Sec-Fetch-User": "?1",
  36. }
  37.  
  38.  
  39. def download_file(file_url, file_name, dir_path, retries=3):
  40.     file_name = os.path.basename(file_name)
  41.     file_name = os.path.join(dir_path, file_name)
  42.  
  43.     if os.path.exists(file_name):
  44.         print(f"Skipping {file_name}, already exists.")
  45.         return
  46.  
  47.     attempt = 0
  48.     while attempt < retries:
  49.         try:
  50.             file_response = session.get(file_url, headers=headers, stream=True)
  51.             total_size = int(file_response.headers.get("content-length", 0))
  52.             with open(file_name, "wb") as file, tqdm(
  53.                 desc=file_name,
  54.                 total=total_size,
  55.                 unit="iB",
  56.                 unit_scale=True,
  57.                 unit_divisor=1024,
  58.             ) as bar:
  59.                 for data in file_response.iter_content(chunk_size=1024):
  60.                     size = file.write(data)
  61.                     bar.update(size)
  62.             return
  63.         except Exception as e:
  64.             print(f"Error downloading {file_url}: {e}")
  65.             attempt += 1
  66.             if attempt < retries:
  67.                 print(f"Retrying {file_url} (attempt {attempt + 1}/{retries})...")
  68.                 time.sleep(2)
  69.             else:
  70.                 print(f"Failed to download {file_url} after {retries} attempts.")
  71.  
  72.  
  73. def download_files(url, dir_path):
  74.     while True:
  75.         try:
  76.             r = session.get(url, headers=headers)
  77.             soup = BeautifulSoup(r.text, "html.parser")
  78.  
  79.             links = [a["href"] for a in soup.find_all("a") if a["href"] != "../"]
  80.             unique_links = list(set(links))
  81.             with ThreadPoolExecutor(max_workers=max_workers) as executor:
  82.                 for link in unique_links:
  83.                     file_url = url + link
  84.                     if link.endswith("/"):
  85.                         new_dir_path = os.path.join(dir_path, link.strip("/"))
  86.                         os.makedirs(new_dir_path, exist_ok=True)
  87.                         download_files(file_url, new_dir_path)
  88.                     else:
  89.                         file_name = os.path.basename(link)
  90.                         file_name = unquote(file_name)
  91.                         file_name = html.unescape(file_name)
  92.                         executor.submit(download_file, file_url, file_name, dir_path)
  93.             return
  94.         except requests.exceptions.ConnectionError as e:
  95.             print(f"Connection error: {e}")
  96.             print(f"Retrying {url}...")
  97.             time.sleep(2)
  98.  
  99.  
  100. download_files(index_url, download_dir)
  101.  
Add Comment
Please, Sign In to add comment