SHARE
TWEET

Python script to download all photos from a Tumblr blog

a guest Mar 26th, 2011 2,781 Never
  1. import os, sys
  2. from shutil import copyfileobj
  3. from urllib import urlopen
  4. from xml.etree import ElementTree as ET
  5.  
  6. if len(sys.argv) != 2:
  7.     print >> sys.stderr, "Pass tumblr name as argument"
  8.     sys.exit()
  9.  
  10. tumblr_name = sys.argv[1]
  11. api_endpoint = 'http://%s.tumblr.com/api/read' % tumblr_name
  12. start = 0
  13. num = 50
  14. post_count = 1
  15.  
  16. while post_count:
  17.     resp = urlopen("%s?type=photo&start=%s&num=%s" % (api_endpoint, start, num))
  18.     content = resp.read()
  19.     tree = ET.fromstring(content)
  20.     post_tags = tree.findall(".//post")
  21.     post_count = len(post_tags)
  22.     for post_tag in post_tags:
  23.         post_id = post_tag.attrib['id']
  24.         post_date = post_tag.attrib['date-gmt'].split(" ")[0]
  25.         outname = "%s-%s-%s.jpeg" % (tumblr_name, post_date, post_id)
  26.         if os.path.exists(outname):
  27.             print "%s already downloaded" % outname
  28.             continue
  29.         for photo_tag in post_tag.findall(".//photo-url"):
  30.             if photo_tag.attrib['max-width'] == "1280":
  31.                 photo_url = photo_tag.text
  32.                 resp = urlopen(photo_url)
  33.                 outfile = open(outname, 'w')
  34.                 copyfileobj(resp, outfile)
  35.                 outfile.close()
  36.                 print "Downloaded %s to %s" % (photo_url, outname)
  37.     start += num
RAW Paste Data
Top