schonke

Untitled

Mar 4th, 2022 (edited)
1,897
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.07 KB | None | 0 0
  1. #!/bin/python3
  2. # A simple, quick and dirty script to download airplane photos from russianplanes to backup attempted scrubbing of military planes for use to identify downed planes in Ukraine.
  3.  
  4. # Import required libraries
  5. import urllib.request
  6. import os.path
  7. import sys, argparse, time
  8.  
  9. # Initiate commandline argument parser
  10. parser = argparse.ArgumentParser()
  11.  
  12. # Add commandline options
  13. parser.add_argument("-s", help="Start number.")
  14. parser.add_argument("-e", help="End number.")
  15. parser.add_argument("-d", help="Debug mode.", action="store_true")
  16.  
  17. # Parse commandline arguments
  18. args=parser.parse_args()
  19.  
  20. # Set endNumber and startNumber variables to default values if not specified otherwise
  21. if args.e != None:
  22.   endNumber = int(args.e)
  23. else:
  24.   endNumber = 262000
  25. if args.s != None:
  26.   startNumber = int(args.s)
  27. else:
  28.   startNumber = endNumber - 1000
  29.  
  30. # Print short debug info if flag -d is set.
  31. if args.d == True:
  32.   print("Start: " + str(startNumber) + ". End: " + str(endNumber) + ". Total number of files: " + str(endNumber - startNumber) + ".")
  33.  
  34. # Initiate numbers for iteration loop
  35. curNumber = startNumber
  36. errorNums = 0
  37.  
  38. # While current image number is less than end number, iterate through loop
  39. while curNumber < endNumber:
  40.   # Break program if errorNums is over 10
  41.   if errorNums > 10:
  42.     print("Too many errors... Quitting!")
  43.     break
  44.   # Set filename to current number and check if file already exists in current directory. If so, continue the loop at next number.
  45.   filename = str(curNumber) + ".jpg"
  46.   if os.path.isfile(filename):
  47.     if args.d == True:
  48.       print("File " + filename + " exists. Skipping...")
  49.     curNumber += 1
  50.     continue
  51.   # Set variable folderName according to the formula of images residing in a folder called /to[closest 1000 over current file numbering. Then set URL with folder and filename.
  52.   folderName = "to" + str(curNumber)[0] + str(curNumber)[1] + str(int(str(curNumber)[2])+1) + "000"
  53.   url = "https://russianplanes.net/images/" + folderName + "/" + filename
  54.   # Try to open URL to catch HTTP errors. If errors occur, sleep for 1 second to prevent request spamming server, then continue loop at next number.
  55.   try:
  56.     openedURL = urllib.request.urlopen(url)
  57.   except:
  58.     if args.d == True:
  59.       print("Error when opening " + url + "... Skipping to next in one second...")
  60.       time.sleep(1)
  61.     curNumber += 1
  62.     continue
  63.   # Check if the content at the requested URL is actually an image. If it's not, increase errorNums and continue loop at next image number.
  64.   if urllib.request.urlopen(url).info().get_content_maintype() != "image":
  65.     errorNums += 1
  66.     curNumber += 1
  67.     if args.d == True:
  68.       print("Requested URL is not an image. Skipping... Current errors: " + str(errorNums))
  69.     continue
  70.   # Print which url is being fetched if flag -d is set.
  71.   if args.d == True:
  72.     print("Fetching " + url + "...")
  73.   # And finally actually fetch the file at URL and save to curNumber.jpg before increasing curNumber and repeating loop.
  74.   urllib.request.urlretrieve(url, str(curNumber) + ".jpg")
  75.   curNumber += 1
Advertisement
Add Comment
Please, Sign In to add comment