Advertisement
lvk

danbooru python dl script

lvk
Aug 15th, 2012
203
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.23 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. from xml.dom import minidom
  3. from time import sleep
  4. import urllib2, codecs, os, urllib, hashlib
  5. imgrdr = urllib.FancyURLopener()
  6.  
  7. #settings go here
  8.  
  9. post_limit=100
  10. pagenum=1 #start at page
  11. pagemax=50 #read this many pages
  12.  
  13. #these are the tags you're searching for
  14. #replace spaces with a + as seen below
  15. tagstosearch="touhou+rating:s"
  16.  
  17. #this is the directory it'll download to
  18. #be sure to use front slashes for the directory name
  19. #instead of backslashes
  20. dirtowrite=u"c:/folder/to/write/to/"
  21.  
  22. #username/password
  23. danuser='username'
  24. danpass='password'
  25.  
  26. #settings end here
  27.  
  28. #some variables used throughout, you don't want to change these
  29. posts=[]
  30. resps=[]
  31. done=[]
  32.  
  33. #salt the password
  34. hashobj = hashlib.new('sha1')
  35. hashobj.update('choujin-steiner--%s--'%danpass)
  36. danpass = hashobj.hexdigest()
  37. del hashobj
  38.  
  39. #failsafe for if the writing directory
  40. if not dirtowrite.endswith('/') and not dirtowrite.endswith('\\'):
  41.     dirtowrite+='/'
  42.  
  43. first=0
  44. initialpage=pagenum
  45.  
  46. def pause_and_exit():
  47.     print 'please hit Enter to continue'
  48.     raw_input()
  49.     raise SystemExit
  50.  
  51. try:
  52.     while(pagenum<=pagemax+initialpage-1):
  53.         dom=minidom.parse(urllib2.urlopen(u"http://danbooru.donmai.us/post/index.xml?limit=%d&page=%d&tags=%s&login=%s&password_hash=%s"%(post_limit, pagenum, tagstosearch, danuser, danpass)))
  54.         if not first:
  55.             doc=urllib2.urlopen(u"http://danbooru.donmai.us/post/index.xml?limit=%d&page=%d&tags=%s&login=%s&password_hash=%s"%(post_limit, pagenum, tagstosearch, danuser, danpass))
  56.             print doc.headers
  57.             print 'Connection established'
  58.             first=1
  59.         posts=dom.getElementsByTagName("post")
  60.         resps=dom.getElementsByTagName("response")
  61.         if(resps!=[]):
  62.             print 'got response:'
  63.             print resps[0].getAttribute("reason")
  64.             print 'which probably means something went wrong'
  65.             pause_and_exit()
  66.         for single_post in posts:
  67.             post_id=single_post.getAttribute("id")
  68.             if(post_id not in done):
  69.                 #the tags variable isn't used but you can uncomment it and do stuff with it
  70.                 #if you want to
  71.                 #tags=single_post.getAttribute(u"tags")
  72.                 md5=single_post.getAttribute("md5")
  73.                 file_extension=single_post.getAttribute("file_url")
  74.                 file_extension=file_extension[file_extension.rfind(".")+1:]
  75.  
  76.                 pending_path = "%s%s.%s"%(dirtowrite, md5, file_extension)
  77.                 if os.path.exists(pending_path):
  78.                     print "#%s skipped, already exists"%post_id
  79.                 else:
  80.                     #this is where the file gets written, any post-operations can be written
  81.                     #right after the imgrdr.retrieve() line
  82.                     print "#%s is downloading (%s)..."%(post_id, md5)
  83.                     imgrdr.retrieve(single_post.getAttribute("file_url"), pending_path)
  84.                    
  85.                 done.append(post_id)
  86.         print 'page %d done'%pagenum
  87.         pagenum+=1
  88. except urllib2.HTTPError:
  89.     print 'HTTP error/service unavailable'
  90.     print 'You were at page %d'%pagenum
  91.     pause_and_exit()
  92. print 'Program finished'
  93. pause_and_exit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement