Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # encoding: utf-8
- """
- imguralbum.py - Download a whole imgur album in one go.
- Provides both a class and a command line utility in a single script
- to download Imgur albums.
- MIT License
- Copyright Alex Gisby <[email protected]>
- """
- import sys
- import re
- import urllib.request, urllib.parse, urllib.error
- import os
- import math
- from collections import Counter
- help_message = """
- Quickly and easily download an album from Imgur.
- Format:
- $ python imguralbum.py [album URL] [destination folder]
- Example:
- $ python imguralbum.py http://imgur.com/a/uOOju#6 /Users/alex/images
- If you omit the dest folder name, the utility will create one with the same name
- as the album
- (for example for http://imgur.com/a/uOOju it'll create uOOju/ in the cwd)
- """
- class ImgurAlbumException(Exception):
- def __init__(self, msg=False):
- self.msg = msg
- class ImgurAlbumDownloader:
- def __init__(self, album_url):
- """
- Constructor. Pass in the album_url that you want to download.
- """
- self.album_url = album_url
- # Callback members:
- self.image_callbacks = []
- self.complete_callbacks = []
- # Check the URL is actually imgur:
- match = re.match("(https?)\:\/\/(www\.)?(?:m\.)?imgur\.com/(a|gallery)/([a-zA-Z0-9]+)(#[0-9]+)?", album_url)
- if not match:
- raise ImgurAlbumException("URL must be a valid Imgur Album")
- self.protocol = match.group(1)
- self.album_key = match.group(4)
- # Read the no-script version of the page for all the images:
- fullListURL = "http://imgur.com/a/" + self.album_key + "/layout/blog"
- try:
- self.response = urllib.request.urlopen(url=fullListURL)
- response_code = self.response.getcode()
- except Exception as e:
- self.response = False
- response_code = e.code
- if not self.response or self.response.getcode() != 200:
- raise ImgurAlbumException("Error reading Imgur: Error Code %d" % response_code)
- # Read in the images now so we can get stats and stuff:
- html = self.response.read().decode('utf-8')
- self.imageIDs = set(re.findall('.*?{"hash":"([a-zA-Z0-9]+)".*?"ext":"(\.[a-zA-Z0-9]+)".*?', html))
- self.cnt = Counter()
- for i in self.imageIDs:
- self.cnt[i[1]] += 1
- def num_images(self):
- """
- Returns the number of images that are present in this album.
- """
- return len(self.imageIDs)
- def list_extensions(self):
- """
- Returns list with occurrences of extensions in descending order.
- """
- return self.cnt.most_common()
- def album_key(self):
- """
- Returns the key of this album. Helpful if you plan on generating your own
- folder names.
- """
- return self.album_key
- def on_image_download(self, callback):
- """
- Allows you to bind a function that will be called just before an image is
- about to be downloaded. You'll be given the 1-indexed position of the image, it's URL
- and it's destination file in the callback like so:
- my_awesome_callback(1, "http://i.imgur.com/fGWX0.jpg", "~/Downloads/1-fGWX0.jpg")
- """
- self.image_callbacks.append(callback)
- def on_complete(self, callback):
- """
- Allows you to bind onto the end of the process, displaying any lovely messages
- to your users, or carrying on with the rest of the program. Whichever.
- """
- self.complete_callbacks.append(callback)
- def save_images(self, foldername=False):
- """
- Saves the images from the album into a folder given by foldername.
- If no foldername is given, it'll use the cwd and the album key.
- And if the folder doesn't exist, it'll try and create it.
- """
- # Try and create the album folder:
- if foldername:
- albumFolder = foldername
- else:
- albumFolder = self.album_key
- if not os.path.exists(albumFolder):
- os.makedirs(albumFolder)
- # And finally loop through and save the images:
- for (counter, image) in enumerate(self.imageIDs, start=1):
- image_url = "http://i.imgur.com/"+image[0]+image[1]
- prefix = "%0*d-" % (
- int(math.ceil(math.log(len(self.imageIDs) + 1, 10))),
- counter
- )
- path = os.path.join(albumFolder, prefix + image[0] + image[1])
- # Run the callbacks:
- for fn in self.image_callbacks:
- fn(counter, image_url, path)
- # Actually download the thing
- if os.path.isfile(path):
- print ("Skipping, already exists.")
- else:
- try:
- urllib.request.urlretrieve(image_url, path)
- except:
- print ("Download failed.")
- os.remove(path)
- # Run the complete callbacks:
- for fn in self.complete_callbacks:
- fn()
- if __name__ == '__main__':
- args = sys.argv
- if len(args) == 1:
- # Print out the help message and exit:
- print (help_message)
- exit()
- try:
- # Fire up the class:
- downloader = ImgurAlbumDownloader(args[1])
- print(("Found {0} images in album".format(downloader.num_images())))
- for i in downloader.list_extensions():
- print(("Found {0} files with {1} extension".format(i[1],i[0])))
- # Called when an image is about to download:
- def print_image_progress(index, url, dest):
- print(("Downloading Image %d" % index))
- print((" %s >> %s" % (url, dest)))
- downloader.on_image_download(print_image_progress)
- # Called when the downloads are all done.
- def all_done():
- print ("")
- print ("Done!")
- downloader.on_complete(all_done)
- # Work out if we have a foldername or not:
- if len(args) == 3:
- albumFolder = args[2]
- else:
- albumFolder = False
- # Enough talk, let's save!
- downloader.save_images(albumFolder)
- exit()
- except ImgurAlbumException as e:
- print(("Error: " + e.msg))
- print ("")
- print ("How to use")
- print ("=============")
- print (help_message)
- exit(1)
Advertisement
Add Comment
Please, Sign In to add comment