#!/bin/python3 # A simple, quick and dirty script to download airplane photos from russianplanes to backup attempted scrubbing of military planes for use to identify downed planes in Ukraine. # Import required libraries import urllib.request import os.path import sys, argparse, time # Initiate commandline argument parser parser = argparse.ArgumentParser() # Add commandline options parser.add_argument("-s", help="Start number.") parser.add_argument("-e", help="End number.") parser.add_argument("-d", help="Debug mode.", action="store_true") # Parse commandline arguments args=parser.parse_args() # Set endNumber and startNumber variables to default values if not specified otherwise if args.e != None: endNumber = int(args.e) else: endNumber = 262000 if args.s != None: startNumber = int(args.s) else: startNumber = endNumber - 1000 # Print short debug info if flag -d is set. if args.d == True: print("Start: " + str(startNumber) + ". End: " + str(endNumber) + ". Total number of files: " + str(endNumber - startNumber) + ".") # Initiate numbers for iteration loop curNumber = startNumber errorNums = 0 # While current image number is less than end number, iterate through loop while curNumber < endNumber: # Break program if errorNums is over 10 if errorNums > 10: print("Too many errors... Quitting!") break # Set filename to current number and check if file already exists in current directory. If so, continue the loop at next number. filename = str(curNumber) + ".jpg" if os.path.isfile(filename): if args.d == True: print("File " + filename + " exists. Skipping...") curNumber += 1 continue # Set variable folderName according to the formula of images residing in a folder called /to[closest 1000 over current file numbering. Then set URL with folder and filename. folderName = "to" + str(curNumber)[0] + str(curNumber)[1] + str(int(str(curNumber)[2])+1) + "000" url = "https://russianplanes.net/images/" + folderName + "/" + filename # Try to open URL to catch HTTP errors. If errors occur, sleep for 1 second to prevent request spamming server, then continue loop at next number. try: openedURL = urllib.request.urlopen(url) except: if args.d == True: print("Error when opening " + url + "... Skipping to next in one second...") time.sleep(1) curNumber += 1 continue # Check if the content at the requested URL is actually an image. If it's not, increase errorNums and continue loop at next image number. if urllib.request.urlopen(url).info().get_content_maintype() != "image": errorNums += 1 curNumber += 1 if args.d == True: print("Requested URL is not an image. Skipping... Current errors: " + str(errorNums)) continue # Print which url is being fetched if flag -d is set. if args.d == True: print("Fetching " + url + "...") # And finally actually fetch the file at URL and save to curNumber.jpg before increasing curNumber and repeating loop. urllib.request.urlretrieve(url, str(curNumber) + ".jpg") curNumber += 1