Untitled

#!/bin/python3
# A simple, quick and dirty script to download airplane photos from russianplanes to backup attempted scrubbing of military planes for use to identify downed planes in Ukraine.

# Import required libraries
import urllib.request
import os.path
import sys, argparse, time

# Initiate commandline argument parser
parser = argparse.ArgumentParser()

# Add commandline options
parser.add_argument("-s", help="Start number.")
parser.add_argument("-e", help="End number.")
parser.add_argument("-d", help="Debug mode.", action="store_true")

# Parse commandline arguments
args=parser.parse_args()

# Set endNumber and startNumber variables to default values if not specified otherwise
if args.e != None:
  endNumber = int(args.e)
else:
  endNumber = 262000
if args.s != None:
  startNumber = int(args.s)
else:
  startNumber = endNumber - 1000

# Print short debug info if flag -d is set.
if args.d == True:
  print("Start: " + str(startNumber) + ". End: " + str(endNumber) + ". Total number of files: " + str(endNumber - startNumber) + ".")

# Initiate numbers for iteration loop
curNumber = startNumber
errorNums = 0

# While current image number is less than end number, iterate through loop
while curNumber < endNumber:
  # Break program if errorNums is over 10
  if errorNums > 10:
    print("Too many errors... Quitting!")
    break
  # Set filename to current number and check if file already exists in current directory. If so, continue the loop at next number.
  filename = str(curNumber) + ".jpg"
  if os.path.isfile(filename):
    if args.d == True:
      print("File " + filename + " exists. Skipping...")
    curNumber += 1
    continue
  # Set variable folderName according to the formula of images residing in a folder called /to[closest 1000 over current file numbering. Then set URL with folder and filename.
  folderName = "to" + str(curNumber)[0] + str(curNumber)[1] + str(int(str(curNumber)[2])+1) + "000"
  url = "https://russianplanes.net/images/" + folderName + "/" + filename
  # Try to open URL to catch HTTP errors. If errors occur, sleep for 1 second to prevent request spamming server, then continue loop at next number.
  try:
    openedURL = urllib.request.urlopen(url)
  except:
    if args.d == True:
      print("Error when opening " + url + "... Skipping to next in one second...")
      time.sleep(1)
    curNumber += 1
    continue
  # Check if the content at the requested URL is actually an image. If it's not, increase errorNums and continue loop at next image number.
  if urllib.request.urlopen(url).info().get_content_maintype() != "image":
    errorNums += 1
    curNumber += 1
    if args.d == True:
      print("Requested URL is not an image. Skipping... Current errors: " + str(errorNums))
    continue
  # Print which url is being fetched if flag -d is set.
  if args.d == True:
    print("Fetching " + url + "...")
  # And finally actually fetch the file at URL and save to curNumber.jpg before increasing curNumber and repeating loop.
  urllib.request.urlretrieve(url, str(curNumber) + ".jpg")
  curNumber += 1