Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- import time
- import urllib
- import os
- ARCHIVE_URL = 'http://rammb.cira.colostate.edu/ramsdis/online/archive_hi_res.asp?data_folder=himawari-8/full_disk_ahi_true_color&width=800&height=800'
- URL_PREFIX = 'http://rammb.cira.colostate.edu/ramsdis/online/'
- IMG_PREFIX = 'images\\'
- LINK_STR = 'Hi-Res Image'
- def main():
- archive_html = get_html_data(ARCHIVE_URL)
- bs_page = BeautifulSoup(archive_html, 'html.parser')
- links = bs_page.findAll('a', string=LINK_STR)
- print('Got Links')
- time.sleep(5.0)
- for i in range(300, len(links)):
- link = links[i]['href']
- img_path = IMG_PREFIX + link.split('/')[-1]
- if not os.path.exists(img_path):
- while True:
- try:
- urllib.request.urlretrieve(URL_PREFIX + link, img_path)
- except:
- print(f'Failed... Retrying')
- time.sleep(0.1)
- else:
- break
- print(f'Downloaded image {img_path} | {i}/{len(links)}')
- time.sleep(5.0)
- def get_html_data(url):
- page = requests.get(url, verify=False)
- page.close()
- return page.content
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement