Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import datetime
- import os
- import time
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.support.ui import Select
- from selenium import webdriver
- from selenium.common.exceptions import ElementClickInterceptedException
- import selenium.common.exceptions as selenium_exceptions
- def create_dir_for_download():
- date = str(datetime.datetime.now())[:10]
- download_dir = os.getcwd() + "/download"
- try:
- os.mkdir(download_dir)
- except Exception:
- pass
- download_dir = os.getcwd() + "/download/" + date
- try:
- os.mkdir(download_dir)
- except Exception:
- pass
- return download_dir
- def create_driver(download_dir: str, is_headless=False):
- options = webdriver.ChromeOptions()
- options.headless = is_headless
- prefs = {
- "download.default_directory": download_dir,
- }
- options.add_experimental_option('prefs', prefs)
- return webdriver.Chrome(executable_path='./chromedriver', options=options)
- def wait_while_file_download(download_dir):
- files_in_download_dir = os.listdir(download_dir)
- is_some_file_downloads = True
- while is_some_file_downloads:
- time.sleep(1)
- is_all_fies_downloaded = True
- for file in files_in_download_dir:
- if ".crdownload" in file:
- is_all_fies_downloaded = False
- is_some_file_downloads = not is_all_fies_downloaded
- files_in_download_dir = os.listdir(download_dir)
- def remove_broken_files(download_dir: str):
- for file in os.listdir(download_dir):
- if (".crdownload" in file):
- os.remove(download_dir + "/" + file)
- def click_element_from_dropdown_menu(driver: webdriver.Chrome, id=0):
- elements_data_ids = ["x:562890744.8:adr:0", "x:562890744.9:adr:1", "x:562890744.10:adr:2", "x:562890744.11:adr:3",
- "x:562890744.12:adr:4", "x:562890744.13:adr:5"]
- drop_down_btn = find_btn(driver, "img", "data-ig", "x:562890744.4:mkr:ButtonImage")
- drop_down_btn.click()
- time.sleep(3)
- element = find_btn(driver, "li", "data-ig", elements_data_ids[id])
- element.click()
- def open_main_page(driver: webdriver.Chrome):
- driver.get("https://pipeline2.kindermorgan.com/Capacity/OpAvailStorage.aspx?code=NGPL")
- def find_btn(driver: webdriver.Chrome, tag, parameter_name, srcValue):
- for btn in driver.find_elements_by_tag_name(tag):
- if btn.get_attribute(parameter_name) == srcValue:
- return btn
- def click_download(driver: webdriver.Chrome):
- find_btn(driver, "input", "alt", "Download Data").click()
- time.sleep(3)
- def download_point_capacity(driver: webdriver.Chrome, date, is_download_all=False):
- open_main_page(driver)
- find_btn(driver, "img", "alt", "Expand Capacity").click()
- find_btn(driver, "img", "alt", "Expand Operationally Available").click()
- find_btn(driver, "li", "data-ig", "x:173558890.7:adr:0.0.0.0").click()
- date_field = driver.find_element_by_class_name("igte_NautilusEditInContainer")
- for i in range(10):
- date_field.send_keys(Keys.BACKSPACE)
- date_field.send_keys(date.strftime("%m%d%Y"))
- download_from_page(driver, is_download_all)
- radio_btn = driver.find_element_by_id("WebSplitter1_tmpl1_ContentPlaceHolder1_rbReceipt")
- radio_btn.click()
- download_from_page(driver, is_download_all)
- def download_storage_capacity(driver: webdriver.Chrome, date, is_download_all=False):
- open_main_page(driver)
- find_btn(driver, "img", "alt", "Expand Capacity").click()
- find_btn(driver, "img", "alt", "Expand Operationally Available").click()
- find_btn(driver, "li", "data-ig", "x:173558890.11:adr:0.0.0.2").click()
- date_field = driver.find_element_by_class_name("igte_NautilusEditInContainer")
- for i in range(10):
- date_field.send_keys(Keys.BACKSPACE)
- date_field.send_keys(date.strftime("%m%d%Y"))
- download_from_page(driver, is_download_all)
- def download_from_page(driver: webdriver.Chrome, is_download_all):
- current_site_page = driver.current_url
- if not is_download_all:
- click_download(driver)
- else:
- for i in range(6):
- try:
- click_element_from_dropdown_menu(driver, i)
- time.sleep(1)
- click_download(driver)
- except Exception as e:
- if "An Error Has Occurred" not in driver.page_source:
- raise e
- print("Ошибка при загрузке")
- driver.get(current_site_page)
- def run(start_date: datetime.date, end_date: datetime.date, is_need_download_all):
- download_dir = create_dir_for_download()
- remove_broken_files(download_dir)
- driver = create_driver(download_dir, is_headless=False)
- current_date = start_date
- while current_date <= end_date:
- time.sleep(1)
- try:
- download_point_capacity(driver, current_date, is_need_download_all)
- download_storage_capacity(driver, current_date, is_need_download_all)
- except Exception as e:
- # if "<class 'selenium.common.exceptions" not in str(e.__class__):
- raise e
- # print("Не удалось загрузить файл(ы) для " + current_date.strftime("%m/%d/%Y"))
- current_date += datetime.timedelta(days=1)
- if __name__ == "__main__":
- start_date = datetime.date(2019, 10, 5)
- end_date = datetime.date(2019, 10, 5)
- run(start_date, end_date, is_need_download_all=True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement