Data Scraper

import requests
from bs4 import BeautifulSoup
import time
import urllib
import os

ARCHIVE_URL = 'http://rammb.cira.colostate.edu/ramsdis/online/archive_hi_res.asp?data_folder=himawari-8/full_disk_ahi_true_color&width=800&height=800'
URL_PREFIX = 'http://rammb.cira.colostate.edu/ramsdis/online/'
IMG_PREFIX = 'images\\'
LINK_STR = 'Hi-Res Image'

def main():
    archive_html = get_html_data(ARCHIVE_URL)
    bs_page = BeautifulSoup(archive_html, 'html.parser')
    links = bs_page.findAll('a', string=LINK_STR)
    print('Got Links')
    time.sleep(5.0)
    for i in range(300, len(links)):
        link = links[i]['href']
        img_path = IMG_PREFIX + link.split('/')[-1]

        if not os.path.exists(img_path):
            while True:
                try:
                    urllib.request.urlretrieve(URL_PREFIX + link, img_path)
                except:
                    print(f'Failed... Retrying')
                    time.sleep(0.1)
                else:
                    break

            print(f'Downloaded image {img_path} | {i}/{len(links)}')
            time.sleep(5.0)


def get_html_data(url):
    page = requests.get(url, verify=False)
    page.close()
    return page.content

if __name__ == '__main__':
    main()