Guest User

edownloader

a guest
Apr 28th, 2023
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.45 KB | None | 0 0
  1. import requests
  2. import os
  3. import time
  4. from rich.progress import track
  5. import argparse
  6. import hashlib
  7. import random
  8.  
  9. parser = argparse.ArgumentParser()
  10. parser.add_argument("-o", "--outputfolder", type=str, default="./output", help="Path to save files")
  11. parser.add_argument("-t", "--tag", type=str, required=True, help="Tags to download")
  12. parser.add_argument("-s", "--sleeptime", type=float, default=0.5, help="Amount of seconds to sleep between downloads")
  13. parser.add_argument("-p", "--startingpage", type=int, default=1, help="Page to start search from")
  14. parser.add_argument("-c", "--count", type=int, default=1000, help="How many random images you need to get, default 1000.")
  15. parser.add_argument("--maxtimeout", type=int, default=1024, help="Timeout in seconds before requester stops trying to download links or images")
  16. parser.add_argument("--debug", action="store_true", help="Flag to enable debug mode.")
  17. parser.add_argument("-r", "--proxy", type=str, default="", help="Socks5 proxy in notation user:pass@ip:port (requires pip install requests[socks])")
  18. args = parser.parse_args()
  19.  
  20. def sendRequest(url):
  21.     go = False
  22.     timeout = 2
  23.     requesttimeout = 12
  24.     while not go:
  25.         try:
  26.             if args.debug:
  27.                 print(f'<== {timeout} === {url}   @   {requesttimeout}')
  28.            
  29.             if args.proxy:
  30.                 r = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0"}, timeout=requesttimeout, proxies={'http': f'socks5://{args.proxy}', 'https': f'socks5://{args.proxy}'})
  31.             else:
  32.                 r = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0"}, timeout=requesttimeout)
  33.            
  34.             if args.debug:
  35.                 print(f"=> CODE {r.status_code}")
  36.             go = True
  37.         except Exception as E:
  38.             requesttimeout += 3
  39.             if timeout <= args.maxtimeout:
  40.                 if args.debug or timeout >= 32:
  41.                     print(f"<= {url}")
  42.                     print(f"REASON: {E}")
  43.                     print(f"Waiting {timeout} seconds + increasing request timeout to {requesttimeout}...")
  44.                 time.sleep(timeout)
  45.                 timeout *= 2
  46.             else:
  47.                 print(f"<= {url}")
  48.                 print(f"REASON: {E}")
  49.                 print("Exiting with code 1.")
  50.                 exit(1)
  51.     if r.status_code != 200:
  52.         print(f"<= {url}")
  53.         print(f"REASON: Status code {r.status_code}")
  54.         print("Exiting with code 1.")
  55.         exit(1)
  56.     return r
  57.  
  58. def calculateMD5(filepath):
  59.     with open(filepath, 'rb') as f:
  60.         return hashlib.md5(f.read()).hexdigest()
  61.    
  62. def downloadAs(url, filepath):
  63.     time.sleep(args.sleeptime)
  64.     file_contents = sendRequest(url).content
  65.     with open(filepath, 'wb') as f:
  66.         f.write(file_contents)
  67.  
  68. def load_json(tags):
  69.     tags_readable = tags.replace(' ', '%20')
  70.     page = args.startingpage
  71.     db_full = {"posts": []}
  72.     db_url = f'https://e621.net/posts.json?limit=300&page={page}&tags={tags_readable}'
  73.     db_url_data = sendRequest(db_url).json()
  74.     while db_url_data["posts"]:
  75.         db_full["posts"] += db_url_data["posts"]
  76.         page += 1
  77.         time.sleep(args.sleeptime)
  78.         db_url = f'https://e621.net/posts.json?limit=300&page={page}&tags={tags_readable}'
  79.         db_url_data = sendRequest(db_url).json()
  80.     print("Found %i images." % (len(db_full["posts"]),))
  81.     random.shuffle(db_full["posts"])
  82.     return db_full
  83.  
  84. def downloadImageAndTags(json_point):
  85.     postId = json_point['id']
  86.     imageExt = json_point['file']['ext']
  87.     imageUrl = json_point['file']['url']
  88.     imageMD5 = json_point["file"]["md5"]
  89.     if not imageUrl:
  90.         md_12 = imageMD5[:2]
  91.         md_34 = imageMD5[2:4]
  92.         imageUrl = f'https://static1.e621.net/data/{md_12}/{md_34}/{imageMD5}.{imageExt}'
  93.  
  94.     try:
  95.         os.mkdir(f'{args.outputfolder}/{postId}')
  96.     except:
  97.         pass
  98.    
  99.     imagePhysicalPath = f'{args.outputfolder}/{postId}.{imageExt}'
  100.     if not os.path.exists(imagePhysicalPath):
  101.         # If file not exist, then download
  102.         downloadAs(imageUrl, imagePhysicalPath)
  103.     elif calculateMD5(imagePhysicalPath) != imageMD5:
  104.         # If file do exist, then compare md5 checksum, and if it is not same, then redownload
  105.         downloadAs(imageUrl, imagePhysicalPath)
  106.  
  107.     # Always remake tags file. Just for convinience.
  108.     tagsPhysicalPath = f'{args.outputfolder}/{postId}/tags.txt'
  109.     tags = []
  110.     if json_point['tags']['general']:
  111.         tags += json_point['tags']['general']
  112.     if json_point['tags']['species']:
  113.         tags += json_point['tags']['species']
  114.     if json_point['tags']['character']:
  115.         tags += json_point['tags']['character']
  116.     if json_point['tags']['artist']:
  117.         tags += json_point['tags']['artist']
  118.     tags += ["rating:" + json_point['rating']]
  119.     with open(tagsPhysicalPath, 'w', encoding='utf-8') as f:
  120.         f.write(',\n'.join(tags))
  121.  
  122. try:
  123.     os.mkdir(args.outputfolder)
  124. except:
  125.     pass
  126.  
  127. c = 0
  128. for post in track(load_json(args.tag)['posts'], description=f"Downloading posts from e621 tagged as '{args.tag}'..."):
  129.     try:
  130.         downloadImageAndTags(post)
  131.         c += 1
  132.         if c > args.count:
  133.             print(f"{args.count} posts downloaded, shutting down!")
  134.             break
  135.     except:
  136.         print(f"{post['id']} failed catastrophically!")
Advertisement
Add Comment
Please, Sign In to add comment