Guest User

Scrape google earth API to generate terrain data-set

a guest
Oct 24th, 2017
31
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.72 KB | None | 0 0
  1. import requests
  2. import shutil
  3. import random
  4. import time
  5. import statistics
  6. import os
  7. from PIL import Image
  8. from PIL import ImageStat
  9. from PIL import ImageEnhance
  10. from os.path import isfile, join
  11. from selenium import webdriver
  12. from selenium.webdriver.common.keys import Keys
  13. from selenium.webdriver.common.by import By
  14. from selenium.webdriver.support.ui import WebDriverWait
  15. from selenium.webdriver.support import expected_conditions as EC
  16.  
  17.  
  18. # requires selenium, requests, statistics and pillow
  19.  
  20.  
  21. # define constants
  22. # image constants
  23. NUM_IMAGES = 7700
  24. NUM_COLORS = 8
  25. IMAGE_SIZE = 128, 128
  26. MAX_STANDARD_DEVIATION = 10000
  27. MIN_BLUE = 0.452
  28.  
  29. # request constants
  30. get_zoom = lambda: random.choice(['8', '9', '10', '11'])
  31. BASE_URL = 'http://maps.google.com/maps/api/staticmap'
  32. REQUEST_DELAY = 0
  33. SAVE_LOCATION = 'Images'
  34.  
  35. # ROUTER_URL = 'http://10.0.0.138/gateway.lp'
  36. # ROUTER_USERNAME = 'admin'
  37. # ROUTER_PASSWORD = 'password'
  38.  
  39. # driver = webdriver.Chrome()
  40.  
  41.  
  42. def mean_color(colors, type='histogram'):
  43.     # get the average color of the image
  44.     if type == 'histogram':
  45.         frequency = []
  46.         for c in colors:
  47.             frequency += [c[1]]*c[0]
  48.         colors = frequency
  49.        
  50.     elif type != 'samples':
  51.         raise KeyError
  52.        
  53.     # do mean for each r, g, b
  54.     mean_color = map(statistics.mean, zip(*colors))
  55.     return mean_color
  56.    
  57. def color_standard_deviation(colors):
  58.     # get the standard deviation of various colors in the image
  59.     frequency = []
  60.     for c in colors:
  61.         frequency += [c[1]]*c[0]
  62.        
  63.     mean = mean_color(frequency, type='samples')
  64.    
  65.     s = []
  66.     for color in frequency:
  67.         # standard deviation over 3 dimensions, as opposed to 1
  68.         s.append(sum(map(lambda x: (color[x]-mean[x])**2, [0, 1, 2])))
  69.    
  70.     stdev = sum(s)**0.5
  71.     return stdev
  72.    
  73. def color_percentage(color):
  74.     # get how much each channel makes up the color as a percentage
  75.     total = sum(color)
  76.     try:
  77.         c_percentage = map(lambda x: x/total, color)
  78.        
  79.     except ZeroDivisionError:
  80.         # all channels are equal
  81.         return 1.0/3, 1.0/3, 1.0/3
  82.    
  83.     return c_percentage
  84.  
  85. def random_location():
  86.     # pick a random latitude and longditude
  87.     lat = str(round(random.uniform(-90, 89), 7))
  88.     lon = str(round(random.uniform(-180, 179), 7))
  89.     return lat, lon
  90.    
  91. def format_image(img):
  92.     # make the image suitable for processing. Reduce color palette and size.
  93.     img = img.convert('P', palette=Image.ADAPTIVE, colors=NUM_COLORS)
  94.     img = img.crop((0, 0)+IMAGE_SIZE)
  95.     return img.convert('RGB')
  96.    
  97. def eval_image(img):
  98.     # get whether or not the image is suitable for the dataset.
  99.     stdev = color_standard_deviation(img.getcolors())
  100.     if stdev > MAX_STANDARD_DEVIATION:
  101.         mean = mean_color(img.getcolors())
  102.         r, g, b = color_percentage(mean)
  103.        
  104.         # filter out the ocean
  105.         if b < MIN_BLUE:
  106.             return True
  107.    
  108.     return False
  109.    
  110. def handle_api_limit():
  111.     # if we send to many requests to google's API they will block us.
  112.     print '[!] API limit hit'
  113.     time.sleep(80000)
  114.  
  115.     # I have it setup such that when the api limit is reached after a few minutes, I automate my router to reset its IP address
  116.     # WebDriverWait(driver, 40).until(EC.presence_of_element_located((By.ID, 'Disconnect')))
  117.    
  118.     # driver.find_element_by_id('Disconnect').click()
  119.     # WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, 'Connect')))
  120.    
  121.     # driver.find_element_by_id('Connect').click()
  122.     # WebDriverWait(driver, 40).until(EC.presence_of_element_located((By.ID, 'Disconnect')))
  123.    
  124. def post_format_images():
  125.     # normalize the brightness for all images in dataset
  126.     # list files, get average brightness
  127.     files = [f for f in listdir(SAVE_LOCATION) if isfile(join(SAVE_LOCATION, f))]
  128.     total = []
  129.     for f in files:
  130.         img = Image.open(os.join+f).convert('L')
  131.         stat = ImageStat.Stat(img)
  132.         total.append(stat.mean[0])
  133.        
  134.     avrg = statistics.mean(total)
  135.     print '[*] Normalizing images for average brightness: {}'.format(avrg)
  136.    
  137.     # normalize brightness for each image and save in 'Normalized_SAVE_LOCATION'
  138.     for f in files:
  139.         img = Image.open(join(SAVE_LOCATION, f))
  140.         gs_img = img.convert('L')
  141.         stat = ImageStat.Stat(gs_img)
  142.         intensity = avrg / stat.mean[0]
  143.         enhancer = ImageEnhance.Brightness(img)
  144.         img = enhancer.enhance(intensity)
  145.         img = img.resize(IMAGE_SIZE)
  146.        
  147.         img.save(os.join('Normalized_{}'.format(SAVE_LOCATION), f))
  148.        
  149. def initiate_driver(d):
  150.     # this is for my router to automate resetting the IP address
  151.     d.get(ROUTER_URL)
  152.     username_box = d.find_element_by_id('srp_username')
  153.     username_box.clear()
  154.     username_box.send_keys(ROUTER_USERNAME)
  155.    
  156.     password_box = d.find_element_by_id('srp_password')
  157.     password_box.clear()
  158.     password_box.send_keys(ROUTER_PASSWORD)  
  159.  
  160.     d.find_element_by_id('sign-me-in').click()
  161.    
  162.     WebDriverWait(driver, 40).until(EC.element_to_be_clickable((By.XPATH, '//u[text()=\'Internet Access\']'))).click()
  163.     time.sleep(6)
  164.     return True
  165.  
  166.    
  167. def main():
  168.     # initiate_driver(driver)
  169.     num_images = 0
  170.    
  171.     # We need NUM_IMAGES to be added to the data-set
  172.     while num_images < NUM_IMAGES:
  173.         time.sleep(REQUEST_DELAY)
  174.         lat, lon = random_location()
  175.         payload = {'scale': '1',
  176.                    'center': '{}%2C{}'.format(lat, lon),
  177.                    'zoom': get_zoom(),
  178.                    'maptype': 'satellite',
  179.                    'sensor': 'false',
  180.                    'size': '{}x{}'.format(int(IMAGE_SIZE[0]*1.1), int(IMAGE_SIZE[0]*1.1))
  181.                   }
  182.                    
  183.         r = requests.get(BASE_URL, params=payload, stream=True)
  184.  
  185.         if r.status_code == 200:
  186.             r.raw.decode_content = True
  187.             try:
  188.                 img = Image.open(r.raw).convert('RGB')
  189.              
  190.             except IOError:
  191.                 continue
  192.                
  193.             img = format_image(img)
  194.             if not eval_image(img):
  195.                 # image is not suitable for dataset
  196.                 continue
  197.                
  198.             # add image to data-set
  199.             num_images += 1
  200.             img.save('{}/{}_{}.png'.format(SAVE_LOCATION, lat, lon))
  201.            
  202.         elif r.status_code == 403:
  203.             handle_api_limit()
  204.            
  205.         else:
  206.             print '[!] Recieved status code: {}'.format(r.status_code)
  207.        
  208.     # once finished getting images
  209.     normalize_brightness()
  210.  
  211. if __name__ == '__main__':
  212.     try:
  213.         main()
  214.     except KeyboardInterrupt:
  215.         exit()
Add Comment
Please, Sign In to add comment