Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- from xml.dom import minidom
- from time import sleep
- import urllib2, codecs, os, urllib, hashlib
- imgrdr = urllib.FancyURLopener()
- #settings go here
- post_limit=100
- pagenum=1 #start at page
- pagemax=50 #read this many pages
- #these are the tags you're searching for
- #replace spaces with a + as seen below
- tagstosearch="touhou+rating:s"
- #this is the directory it'll download to
- #be sure to use front slashes for the directory name
- #instead of backslashes
- dirtowrite=u"c:/folder/to/write/to/"
- #username/password
- danuser='username'
- danpass='password'
- #settings end here
- #some variables used throughout, you don't want to change these
- posts=[]
- resps=[]
- done=[]
- #salt the password
- hashobj = hashlib.new('sha1')
- hashobj.update('choujin-steiner--%s--'%danpass)
- danpass = hashobj.hexdigest()
- del hashobj
- #failsafe for if the writing directory
- if not dirtowrite.endswith('/') and not dirtowrite.endswith('\\'):
- dirtowrite+='/'
- first=0
- initialpage=pagenum
- def pause_and_exit():
- print 'please hit Enter to continue'
- raw_input()
- raise SystemExit
- try:
- while(pagenum<=pagemax+initialpage-1):
- dom=minidom.parse(urllib2.urlopen(u"http://danbooru.donmai.us/post/index.xml?limit=%d&page=%d&tags=%s&login=%s&password_hash=%s"%(post_limit, pagenum, tagstosearch, danuser, danpass)))
- if not first:
- doc=urllib2.urlopen(u"http://danbooru.donmai.us/post/index.xml?limit=%d&page=%d&tags=%s&login=%s&password_hash=%s"%(post_limit, pagenum, tagstosearch, danuser, danpass))
- print doc.headers
- print 'Connection established'
- first=1
- posts=dom.getElementsByTagName("post")
- resps=dom.getElementsByTagName("response")
- if(resps!=[]):
- print 'got response:'
- print resps[0].getAttribute("reason")
- print 'which probably means something went wrong'
- pause_and_exit()
- for single_post in posts:
- post_id=single_post.getAttribute("id")
- if(post_id not in done):
- #the tags variable isn't used but you can uncomment it and do stuff with it
- #if you want to
- #tags=single_post.getAttribute(u"tags")
- md5=single_post.getAttribute("md5")
- file_extension=single_post.getAttribute("file_url")
- file_extension=file_extension[file_extension.rfind(".")+1:]
- pending_path = "%s%s.%s"%(dirtowrite, md5, file_extension)
- if os.path.exists(pending_path):
- print "#%s skipped, already exists"%post_id
- else:
- #this is where the file gets written, any post-operations can be written
- #right after the imgrdr.retrieve() line
- print "#%s is downloading (%s)..."%(post_id, md5)
- imgrdr.retrieve(single_post.getAttribute("file_url"), pending_path)
- done.append(post_id)
- print 'page %d done'%pagenum
- pagenum+=1
- except urllib2.HTTPError:
- print 'HTTP error/service unavailable'
- print 'You were at page %d'%pagenum
- pause_and_exit()
- print 'Program finished'
- pause_and_exit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement