Guest User

imgur-dl.py

a guest
Apr 28th, 2017
873
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.50 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. # encoding: utf-8
  3.  
  4.  
  5. """
  6. imguralbum.py - Download a whole imgur album in one go.
  7.  
  8. Provides both a class and a command line utility in a single script
  9. to download Imgur albums.
  10.  
  11. MIT License
  12. Copyright Alex Gisby <[email protected]>
  13. """
  14.  
  15.  
  16. import sys
  17. import re
  18. import urllib.request, urllib.parse, urllib.error
  19. import os
  20. import math
  21. from collections import Counter
  22.  
  23.  
  24. help_message = """
  25. Quickly and easily download an album from Imgur.
  26.  
  27. Format:
  28. $ python imguralbum.py [album URL] [destination folder]
  29.  
  30. Example:
  31. $ python imguralbum.py http://imgur.com/a/uOOju#6 /Users/alex/images
  32.  
  33. If you omit the dest folder name, the utility will create one with the same name
  34. as the album
  35. (for example for http://imgur.com/a/uOOju it'll create uOOju/ in the cwd)
  36. """
  37.  
  38.  
  39. class ImgurAlbumException(Exception):
  40. def __init__(self, msg=False):
  41. self.msg = msg
  42.  
  43.  
  44. class ImgurAlbumDownloader:
  45. def __init__(self, album_url):
  46. """
  47. Constructor. Pass in the album_url that you want to download.
  48. """
  49. self.album_url = album_url
  50.  
  51. # Callback members:
  52. self.image_callbacks = []
  53. self.complete_callbacks = []
  54.  
  55. # Check the URL is actually imgur:
  56. match = re.match("(https?)\:\/\/(www\.)?(?:m\.)?imgur\.com/(a|gallery)/([a-zA-Z0-9]+)(#[0-9]+)?", album_url)
  57. if not match:
  58. raise ImgurAlbumException("URL must be a valid Imgur Album")
  59.  
  60. self.protocol = match.group(1)
  61. self.album_key = match.group(4)
  62.  
  63. # Read the no-script version of the page for all the images:
  64. fullListURL = "http://imgur.com/a/" + self.album_key + "/layout/blog"
  65.  
  66. try:
  67. self.response = urllib.request.urlopen(url=fullListURL)
  68. response_code = self.response.getcode()
  69. except Exception as e:
  70. self.response = False
  71. response_code = e.code
  72.  
  73. if not self.response or self.response.getcode() != 200:
  74. raise ImgurAlbumException("Error reading Imgur: Error Code %d" % response_code)
  75.  
  76. # Read in the images now so we can get stats and stuff:
  77. html = self.response.read().decode('utf-8')
  78. self.imageIDs = set(re.findall('.*?{"hash":"([a-zA-Z0-9]+)".*?"ext":"(\.[a-zA-Z0-9]+)".*?', html))
  79.  
  80. self.cnt = Counter()
  81. for i in self.imageIDs:
  82. self.cnt[i[1]] += 1
  83.  
  84.  
  85. def num_images(self):
  86. """
  87. Returns the number of images that are present in this album.
  88. """
  89. return len(self.imageIDs)
  90.  
  91.  
  92. def list_extensions(self):
  93. """
  94. Returns list with occurrences of extensions in descending order.
  95. """
  96. return self.cnt.most_common()
  97.  
  98.  
  99. def album_key(self):
  100. """
  101. Returns the key of this album. Helpful if you plan on generating your own
  102. folder names.
  103. """
  104. return self.album_key
  105.  
  106.  
  107. def on_image_download(self, callback):
  108. """
  109. Allows you to bind a function that will be called just before an image is
  110. about to be downloaded. You'll be given the 1-indexed position of the image, it's URL
  111. and it's destination file in the callback like so:
  112. my_awesome_callback(1, "http://i.imgur.com/fGWX0.jpg", "~/Downloads/1-fGWX0.jpg")
  113. """
  114. self.image_callbacks.append(callback)
  115.  
  116.  
  117. def on_complete(self, callback):
  118. """
  119. Allows you to bind onto the end of the process, displaying any lovely messages
  120. to your users, or carrying on with the rest of the program. Whichever.
  121. """
  122. self.complete_callbacks.append(callback)
  123.  
  124.  
  125. def save_images(self, foldername=False):
  126. """
  127. Saves the images from the album into a folder given by foldername.
  128. If no foldername is given, it'll use the cwd and the album key.
  129. And if the folder doesn't exist, it'll try and create it.
  130. """
  131. # Try and create the album folder:
  132. if foldername:
  133. albumFolder = foldername
  134. else:
  135. albumFolder = self.album_key
  136.  
  137. if not os.path.exists(albumFolder):
  138. os.makedirs(albumFolder)
  139.  
  140. # And finally loop through and save the images:
  141. for (counter, image) in enumerate(self.imageIDs, start=1):
  142. image_url = "http://i.imgur.com/"+image[0]+image[1]
  143.  
  144. prefix = "%0*d-" % (
  145. int(math.ceil(math.log(len(self.imageIDs) + 1, 10))),
  146. counter
  147. )
  148. path = os.path.join(albumFolder, prefix + image[0] + image[1])
  149.  
  150. # Run the callbacks:
  151. for fn in self.image_callbacks:
  152. fn(counter, image_url, path)
  153.  
  154. # Actually download the thing
  155. if os.path.isfile(path):
  156. print ("Skipping, already exists.")
  157. else:
  158. try:
  159. urllib.request.urlretrieve(image_url, path)
  160. except:
  161. print ("Download failed.")
  162. os.remove(path)
  163.  
  164. # Run the complete callbacks:
  165. for fn in self.complete_callbacks:
  166. fn()
  167.  
  168.  
  169. if __name__ == '__main__':
  170. args = sys.argv
  171.  
  172. if len(args) == 1:
  173. # Print out the help message and exit:
  174. print (help_message)
  175. exit()
  176.  
  177. try:
  178. # Fire up the class:
  179. downloader = ImgurAlbumDownloader(args[1])
  180.  
  181. print(("Found {0} images in album".format(downloader.num_images())))
  182.  
  183. for i in downloader.list_extensions():
  184. print(("Found {0} files with {1} extension".format(i[1],i[0])))
  185.  
  186. # Called when an image is about to download:
  187. def print_image_progress(index, url, dest):
  188. print(("Downloading Image %d" % index))
  189. print((" %s >> %s" % (url, dest)))
  190. downloader.on_image_download(print_image_progress)
  191.  
  192. # Called when the downloads are all done.
  193. def all_done():
  194. print ("")
  195. print ("Done!")
  196. downloader.on_complete(all_done)
  197.  
  198. # Work out if we have a foldername or not:
  199. if len(args) == 3:
  200. albumFolder = args[2]
  201. else:
  202. albumFolder = False
  203.  
  204. # Enough talk, let's save!
  205. downloader.save_images(albumFolder)
  206. exit()
  207.  
  208. except ImgurAlbumException as e:
  209. print(("Error: " + e.msg))
  210. print ("")
  211. print ("How to use")
  212. print ("=============")
  213. print (help_message)
  214. exit(1)
Advertisement
Add Comment
Please, Sign In to add comment