networkcat

Untitled

Mar 20th, 2019
61
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # Multithreaded Danbooru Downloader
  2. # How to use: provide a list of tagstrings such as
  3. # ["tag1", "tag1 tag2"]
  4. # Mar 2019
  5.  
  6. # Don't forget to install required modules
  7.  
  8. import os
  9. import shutil
  10. from multiprocessing.dummy import Pool as ThreadPool
  11.  
  12. import requests
  13. from fake_useragent import UserAgent
  14. from pybooru import Danbooru
  15.  
  16. TAGSTRINGS = ["toga_himiko", "ashido_mina", "yaoyorozu_momo", "asui_tsuyu", "uraraka_ochako"]
  17. THREADCOUNT = 5
  18. PAGES_HARDMAX = 9999 # reduce this if you don't want to get 500 pages
  19. NTHPAGE = 5 # Nth page for intermediate updates
  20. client = Danbooru("danbooru")
  21. ua = UserAgent()
  22. DLTYPES = ["file_url", "source", "large_file_url"] # File types to download. There is currently no logic to handle the types. There will be weird files.
  23.  
  24.  
  25. def dl_randua(fu, fn, uagent):
  26.     r = requests.get(fu, stream=True, headers={"User-agent": uagent})
  27.     with open(fn, "wb") as f:
  28.         r.raw.decode_content = True
  29.         shutil.copyfileobj(r.raw, f)
  30.  
  31.  
  32. def dl_tag(tag):
  33.     print("Downloading tag", tag, "...")
  34.     directory = os.path.join(".", tag)
  35.     try:
  36.         os.stat(directory)
  37.     except:
  38.         os.mkdir(directory)
  39.  
  40.     try:
  41.         for pg in range(PAGES_HARDMAX):
  42.             try:
  43.                 if pg % NTHPAGE ==0:
  44.                     print("Currently downloading tag:", tag, "at page:", pg)
  45.                 posts = client.post_list(tags=tag, page=pg)
  46.                 if posts:
  47.                     for p in posts:
  48.                         for dt in DLTYPES:
  49.                             try:
  50.                                 f_url = p[dt]
  51.                                 f_name = f_url.split("/")[-1]
  52.                                 f_dir = os.path.join(directory, f_name)
  53.                                 dl_randua(f_url, f_dir, ua.chrome)
  54.                             except:
  55.                                 pass
  56.                 else:
  57.                     print("No more posts in tag:", tag, "at page:", pg)
  58.                     break
  59.             except:
  60.                 print("Exception on tag:", tag, "at page:", pg, ", skipping page.") # Too lazy to figure out what is throwing the exception here
  61.     except:
  62.         print("Uncaught exception downloading tag:", tag, "at page:", pg, "EXITING!!!") # shouldn't be a problem anymore.
  63.  
  64.  
  65. def main():
  66.     pool = ThreadPool(THREADCOUNT)
  67.     pool.map(dl_tag, TAGSTRINGS)
  68.  
  69.  
  70. if __name__ == "__main__":
  71.     main()
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×