Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # PYTHON | Increment Image URL downloading script
- # Download images from pages that store their images using an incremented URL
- # please create a folder called 'data'. Downloaded data will be stored there.
- # Based on beatifulsoup. Idealy used on Google Colab with Google Drive, see line 10
- from bs4 import *
- import requests
- import os
- # If you are using Google Colab, and also want to use Google Drive to store your
- # files, then remove the preceding # from following two lines
- # from google.colab import drive
- # drive.mount('/content/drive')
- # Downloaded just used for the log, start and stop allow to define a range
- downloaded = 0
- start = 1
- stop = 100
- # Main script. Please insert your target url before running.
- for i in range(start,stop):
- url = "https://<DOMAIN>.<TLD>"+str(i)
- # get content of URL
- r = requests.get(url)
- # parse HTML code
- soup = BeautifulSoup(r.text, 'html.parser')
- # find all images in URL
- images = soup.findAll('img')
- # select folder
- folder_name = "data"
- # checking if images is not zero
- if len(images) != 0:
- image = images[2]
- try:
- # In image tag ,searching for "data-srcset"
- image_link = image["data-srcset"]
- # then we will search for "data-src" in img tag and so on..
- except:
- try:
- # In image tag ,searching for "data-src"
- image_link = image["data-src"]
- except:
- try:
- # In image tag ,searching for "data-fallback-src"
- image_link = image["data-fallback-src"]
- except:
- try:
- # In image tag ,searching for "src"
- image_link = image["src"]
- # if no Source URL found
- except:
- pass
- # after getting image URL, try to download the image
- try:
- r = requests.get(image_link).content
- try:
- # possibility of decode
- r = str(r, 'utf-8')
- except UnicodeDecodeError:
- # After checking above condition, Image Download start
- with open(f"{folder_name}/images{i}.jpg", "wb+") as f:
- f.write(r)
- except:
- pass
- downloaded += 1
- remaining = stop - (start + downloaded)
- print("Downloaded: ", downloaded, "\tRemaining: ", remaining)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement