Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/python3
- # A simple, quick and dirty script to download airplane photos from russianplanes to backup attempted scrubbing of military planes for use to identify downed planes in Ukraine.
- # Import required libraries
- import urllib.request
- import os.path
- import sys, argparse, time
- # Initiate commandline argument parser
- parser = argparse.ArgumentParser()
- # Add commandline options
- parser.add_argument("-s", help="Start number.")
- parser.add_argument("-e", help="End number.")
- parser.add_argument("-d", help="Debug mode.", action="store_true")
- # Parse commandline arguments
- args=parser.parse_args()
- # Set endNumber and startNumber variables to default values if not specified otherwise
- if args.e != None:
- endNumber = int(args.e)
- else:
- endNumber = 262000
- if args.s != None:
- startNumber = int(args.s)
- else:
- startNumber = endNumber - 1000
- # Print short debug info if flag -d is set.
- if args.d == True:
- print("Start: " + str(startNumber) + ". End: " + str(endNumber) + ". Total number of files: " + str(endNumber - startNumber) + ".")
- # Initiate numbers for iteration loop
- curNumber = startNumber
- errorNums = 0
- # While current image number is less than end number, iterate through loop
- while curNumber < endNumber:
- # Break program if errorNums is over 10
- if errorNums > 10:
- print("Too many errors... Quitting!")
- break
- # Set filename to current number and check if file already exists in current directory. If so, continue the loop at next number.
- filename = str(curNumber) + ".jpg"
- if os.path.isfile(filename):
- if args.d == True:
- print("File " + filename + " exists. Skipping...")
- curNumber += 1
- continue
- # Set variable folderName according to the formula of images residing in a folder called /to[closest 1000 over current file numbering. Then set URL with folder and filename.
- folderName = "to" + str(curNumber)[0] + str(curNumber)[1] + str(int(str(curNumber)[2])+1) + "000"
- url = "https://russianplanes.net/images/" + folderName + "/" + filename
- # Try to open URL to catch HTTP errors. If errors occur, sleep for 1 second to prevent request spamming server, then continue loop at next number.
- try:
- openedURL = urllib.request.urlopen(url)
- except:
- if args.d == True:
- print("Error when opening " + url + "... Skipping to next in one second...")
- time.sleep(1)
- curNumber += 1
- continue
- # Check if the content at the requested URL is actually an image. If it's not, increase errorNums and continue loop at next image number.
- if urllib.request.urlopen(url).info().get_content_maintype() != "image":
- errorNums += 1
- curNumber += 1
- if args.d == True:
- print("Requested URL is not an image. Skipping... Current errors: " + str(errorNums))
- continue
- # Print which url is being fetched if flag -d is set.
- if args.d == True:
- print("Fetching " + url + "...")
- # And finally actually fetch the file at URL and save to curNumber.jpg before increasing curNumber and repeating loop.
- urllib.request.urlretrieve(url, str(curNumber) + ".jpg")
- curNumber += 1
Advertisement
Add Comment
Please, Sign In to add comment