Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import warnings
- import argparse
- from pathlib import Path
- from selenium import webdriver
- from datetime import datetime, timedelta
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support.ui import WebDriverWait
- from webdriver_manager.chrome import ChromeDriverManager
- from webdriver_manager.core.os_manager import ChromeType
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.webdriver.chrome.service import Service as ChromiumService
- def wait_and_click(driver, locator, wait_time=20):
- """
- Waits for an element to be clickable and clicks it.
- Accepts both XPATH and ID locators.
- """
- element = WebDriverWait(driver, wait_time).until(
- EC.element_to_be_clickable(locator)
- )
- element.click()
- def setup_driver(download_dir):
- """
- Initializes and returns a Chrome WebDriver with options.
- """
- options = webdriver.ChromeOptions()
- options.add_argument('--headless')
- options.add_argument('--enable-logging')
- options.add_experimental_option("prefs", {"download.default_directory": download_dir})
- options.add_experimental_option("detach", True)
- driver = webdriver.Chrome(service=ChromiumService(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install()), options=options)
- return driver
- def select_date(driver, target_date):
- today = date.today()
- month_difference = ((target_date.year -today.year) * 12) - today.month + target_date.month
- day = target_date.strftime("%d")
- try:
- wait_and_click(driver, (By.ID, "data-end-date"))
- if month_difference < 0:
- for _ in range(abs(month_difference)):
- wait_and_click(driver, (By.CSS_SELECTOR, ".fa.fa-chevron-left"))
- elif month_difference > 0:
- for _ in range(month_difference):
- wait_and_click(driver, (By.CSS_SELECTOR, ".fa.fa-chevron-right"))
- wait_and_click(driver, (By.ID, f'//span[text()="{day}"]'))
- except Exception as e:
- warnings.warn(f"An error occurred: {e}")
- def navigate_and_export_data(driver, target_date):
- """
- Navigates through the UI and exports data for a given month and day.
- """
- try:
- # Open date picker
- select_date(driver=driver, target_date=target_date)
- # Export data to Excel
- wait_and_click(driver, (By.XPATH, '//span[@title="Export data to Excel"]'))
- except Exception as e:
- warnings.warn(f"An error occurred: {e}")
- def scrape(driver, target_date):
- driver.get("https://www.nordpoolgroup.com/en/market-data12/Intraday/Market-data1/Market-data1/Overview/?dd=SE3&view=table")
- navigate_and_export_data(driver=driver, target_date=target_date)
- if __name__ == "__main__":
- parser = argparse.ArgumentParser(description='Scrape NordPoolGroup Market Data')
- parser.add_argument('-d', '--date_format', default="%d-%m-%Y", type=str, help="Set date format", dest="date_format")
- parser.add_argument('-s', '--start_date', type=str, help='Start date', dest="start_date")
- parser.add_argument('-e', '--end_date', type=str, help='End date', dest="end_date")
- parser.add_argument('-f', '--file', type=str, help="Path to file containing dates", dest="file_path")
- parser.add_argument('-o', '--dst_dir', type=str, help='Destination directory for download', dest="dst_dir")
- args = parser.parse_args()
- if args.dst_dir is None:
- dst_dir = Path()
- else:
- dst_dir = Path(args.dst_dir)
- dst_dir.mkdir(parents=True, exist_ok=True)
- date_format = args.date_format
- if args.start_date is not None:
- try:
- start_date = datetime.strptime(args.start_date, date_format)
- except ValueError:
- warnings.warn(f"Invalid date or Illformed date wrt {date_format} format: {args.start_date}")
- start_date = None
- if args.start_date is not None:
- try:
- end_date = datetime.strptime(args.end_date, date_format)
- except ValueError:
- warnings.warn(f"Invalid date or Illformed date wrt {date_format} format: {args.end_date}")
- end_date = None
- if (start_date is not None) and (end_date is not None):
- assert start_date <= end_date, "Start date ({start_date}) cannot be later than end date ({end_date}) !!"
- dates = None
- if args.file_path is not None:
- file_path = Path(args.file_path)
- if file_path.is_file():
- with open(file_path, 'r') as src:
- lines = src.readlines()
- df = lines[0]
- dates = list()
- for ds in lines[1:]:
- try:
- dt = datetime.strptime(ds, df)
- except ValueError as ve:
- warnings.warn(f"Invalid date or Illformed date wrt {df} format: {ds}")
- if ((start_date is None) or (start_date <= dt)) and ((end_date is None) or (end_date >= dt)):
- dates.append(dt)
- dates = list(set(dates))
- if dates is None:
- if start_date is None:
- raise ValueError("Start date must be set when file with dates are not specified!")
- if end_date is None:
- raise ValueError("End date must be set when file with dates are not specified!")
- dates = list()
- current_date = start_date
- while current_date <= end_date:
- dates.append(current_date)
- current_date += timedelta(days=1)
- driver = setup_driver(str(dst_dir))
- for date in dates:
- scrape(driver=driver, target_date=date)
- driver.quit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement