Advertisement
SoerenHelms

Increment Image URL download script

Aug 12th, 2022 (edited)
987
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.49 KB | None | 0 0
  1. # PYTHON | Increment Image URL downloading script
  2.  
  3. # Download images from pages that store their images using an incremented URL
  4. # please create a folder called 'data'. Downloaded data will be stored there.
  5. # Based on beatifulsoup. Idealy used on Google Colab with Google Drive, see line 10
  6. from bs4 import *
  7. import requests
  8. import os
  9.  
  10. # If you are using Google Colab, and also want to use Google Drive to store your
  11. # files, then remove the preceding # from following two lines
  12. # from google.colab import drive
  13. # drive.mount('/content/drive')
  14.  
  15. # Downloaded just used for the log, start and stop allow to define a range
  16. downloaded = 0
  17. start = 1
  18. stop = 100
  19.  
  20. # Main script. Please insert your target url before running.
  21. for i in range(start,stop):
  22.     url = "https://<DOMAIN>.<TLD>"+str(i)
  23.  
  24.     # get content of URL
  25.     r = requests.get(url)
  26.  
  27.     # parse HTML code
  28.     soup = BeautifulSoup(r.text, 'html.parser')
  29.  
  30.     # find all images in URL
  31.     images = soup.findAll('img')
  32.  
  33.     # select folder
  34.     folder_name = "data"
  35.  
  36.     # checking if images is not zero
  37.     if len(images) != 0:
  38.         image = images[2]
  39.         try:
  40.             # In image tag ,searching for "data-srcset"
  41.             image_link = image["data-srcset"]
  42.            
  43.         # then we will search for "data-src" in img tag and so on..
  44.         except:
  45.             try:
  46.                 # In image tag ,searching for "data-src"
  47.                 image_link = image["data-src"]
  48.             except:
  49.                 try:
  50.                     # In image tag ,searching for "data-fallback-src"
  51.                     image_link = image["data-fallback-src"]
  52.                 except:
  53.                     try:
  54.                         # In image tag ,searching for "src"
  55.                         image_link = image["src"]
  56.  
  57.                     # if no Source URL found
  58.                     except:
  59.                         pass
  60.  
  61.         # after getting image URL, try to download the image
  62.         try:
  63.             r = requests.get(image_link).content
  64.             try:
  65.  
  66.                 # possibility of decode
  67.                 r = str(r, 'utf-8')
  68.  
  69.             except UnicodeDecodeError:
  70.  
  71.                 # After checking above condition, Image Download start
  72.                 with open(f"{folder_name}/images{i}.jpg", "wb+") as f:
  73.                     f.write(r)
  74.  
  75.         except:
  76.             pass
  77.  
  78.         downloaded += 1
  79.         remaining = stop - (start + downloaded)
  80.         print("Downloaded: ", downloaded, "\tRemaining: ", remaining)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement