Guest User

Thread Media Downloader

a guest
Jan 11th, 2023
57
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.89 KB | None | 0 0
  1. from urllib.request import urlopen
  2. import requests
  3. import os
  4. import sys
  5. import json
  6. from pathlib import Path
  7.  
  8.  
  9. def download(url, dst_dir):
  10.     name = url.split('/')[-1].split('?')[0]
  11.     filename = os.path.join(dst_dir, name)
  12.     if not os.path.exists(filename):
  13.         with open(filename, 'wb') as f:
  14.             code = 999
  15.             while code != 200:
  16.                 print('Downloading', url, f'\t[RETRY: {code}]' if code != 999 else '')
  17.                 try:
  18.                     r = requests.get(url)
  19.                 except:
  20.                     print('FAIL: ', url)
  21.                     return
  22.                 code = r.status_code
  23.             f.write(r.content)
  24.         return filename
  25.  
  26. def urls_from_text_files(dir):
  27.     url_dir_pairs = {}
  28.     for file in os.listdir(src):
  29.         if file.endswith('.txt'):
  30.             with open(os.path.join(src, file), 'r') as f:
  31.                 dst_dir = file.split('.txt')[0]
  32.                 dst_dir = os.path.join(src, dst_dir)
  33.                 os.makedirs(dst_dir, exist_ok=True)
  34.                 urls = set(f.read().split(','))
  35.                 url_dir_pairs[dst_dir] = urls
  36.     return url_dir_pairs
  37.  
  38. def get_thread_urls():
  39.     thread_url = input('Thread url: ')
  40.     thread_id = thread_url.split('/res/')[-1].split('.')[0]
  41.     thread_board = thread_url.split('/res/')[0].split('/')[-1]
  42.    
  43.     arch = 'arch' in thread_url
  44.    
  45.     if arch:
  46.         url = thread_url.replace('html', 'json')
  47.     else:
  48.         url = f'https://2ch.hk/api/mobile/v2/after/{thread_board}/{thread_id}/{thread_id}'
  49.     j = requests.get(url).json()
  50.    
  51.    
  52.     # threads[].posts[].files[].path
  53.     posts = j['threads'][0]['posts'] if arch else j['posts']
  54.     file_posts = [file_post['files'] for file_post in posts if file_post['files']]
  55.     files = [file for post in file_posts for file in post]
  56.     paths = [path['path'] for path in files]
  57.    
  58.     urls = [('https://2ch.hk' + path) for path in paths if 'sticker' not in path]
  59.    
  60.     with open(os.path.join(src, thread_id+".txt"), 'w') as file:
  61.         file.write(", ".join(urls))
  62.    
  63.     print(f"Saved {len(urls)} links.\n")
  64.  
  65. def download_from_txt_lists():
  66.     if len(sys.argv) > 1:
  67.         for url in sys.argv[1:]:
  68.             download(url, dst)
  69.     else:
  70.         print(f"Finds every TXT files in {src} directory and treats them as url list where each url must be downloaded in corresponding folder.")
  71.         input('Press Enter to initiate the download process.')
  72.         url_dir_pairs = urls_from_text_files(src)
  73.         for dir in url_dir_pairs:
  74.             for url in url_dir_pairs[dir]:
  75.                 download(url, dir)
  76.         urls_count = sum([True for key in url_dir_pairs for url in url_dir_pairs[key]])
  77.         input(f'{len(url_dir_pairs)} files, {urls_count} urls')
  78.  
  79. if __name__ == "__main__":
  80.     src = str(Path.home() / "Downloads")
  81.     get_thread_urls()
  82.     download_from_txt_lists()
  83.  
  84.  
Add Comment
Please, Sign In to add comment