Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from selenium import webdriver
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.common.by import By
- from selenium.webdriver.chrome.options import Options
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- import openpyxl
- from datetime import datetime
- import time
- import os
- # URLs (focusing on Premier Kladionica for now)
- urls = [
- "https://www.premier-kladionica.com/ponuda"
- ]
- def setup_driver():
- chrome_options = Options()
- # Removed --headless for debugging (browser will be visible)
- chrome_options.add_argument("--no-sandbox")
- chrome_options.add_argument("--disable-dev-shm-usage")
- service = Service("chromedriver.exe") # Ensure chromedriver.exe is in the script directory
- driver = webdriver.Chrome(service=service, options=chrome_options)
- return driver
- def scrape_odds(url, driver):
- try:
- driver.get(url)
- print("Page loaded, waiting for initial connection to complete...")
- # Wait for the loading indicator to disappear (adjust selector if needed)
- WebDriverWait(driver, 20).until_not(
- EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Spajanje u toku')]"))
- )
- print("Loading complete, proceeding with scraping...")
- # Wait for the table to be present
- WebDriverWait(driver, 10).until(
- EC.presence_of_element_located((By.ID, "prikaz_ponude_table"))
- )
- # Expand leagues by clicking on elements with class "cell--SPOJENO"
- expand_buttons = WebDriverWait(driver, 10).until(
- EC.presence_of_all_elements_located((By.CSS_SELECTOR, "span.cell--SPOJENO"))
- )
- print(f"Found {len(expand_buttons)} expand buttons.")
- for i, button in enumerate(expand_buttons):
- try:
- driver.execute_script("arguments[0].scrollIntoView(true);", button) # Scroll to the button
- WebDriverWait(driver, 5).until(EC.element_to_be_clickable(button))
- driver.execute_script("arguments[0].click();", button) # Use JavaScript to click
- print(f"Clicked expand button {i+1}.")
- time.sleep(2) # Wait for the section to expand
- except Exception as e:
- print(f"Could not click expand button {i+1}: {e}")
- continue
- # Wait for match rows to be visible after expanding
- WebDriverWait(driver, 15).until(
- EC.presence_of_element_located((By.CSS_SELECTOR, "tr.prPrikaz"))
- )
- matches = {}
- # Find the table containing the matches
- match_table = driver.find_element(By.ID, "prikaz_ponude_table")
- match_rows = match_table.find_elements(By.CSS_SELECTOR, "tr.prPrikaz")
- print(f"Found {len(match_rows)} match rows in the table.")
- for match in match_rows:
- # Extract teams
- try:
- teams_elem = match.find_element(By.CSS_SELECTOR, "td.prFontBol")
- teams = teams_elem.text.strip().replace("\n", " vs ")
- except Exception as e:
- print(f"Error finding teams: {e}")
- continue
- # Extract odds (1, X, 2)
- try:
- odds_elements = match.find_elements(By.CSS_SELECTOR, "td.br-48")
- odds_list = []
- for odd in odds_elements[:3]: # Take first 3 odds (1, X, 2)
- odd_text = odd.text.strip()
- if odd_text.replace('.', '').isdigit():
- odds_list.append(odd_text)
- if len(odds_list) == 3:
- matches[teams] = odds_list
- else:
- print(f"Could not extract 3 odds for {teams}: {odds_list}")
- except Exception as e:
- print(f"Error extracting odds for {teams}: {e}")
- continue
- print(f"Successfully scraped {url}: {len(matches)} matches found")
- return matches
- except Exception as e:
- print(f"Error scraping {url}: {str(e)}")
- return {}
- def update_excel(matches_dict):
- try:
- wb = openpyxl.load_workbook("odds.xlsx") if "odds.xlsx" in os.listdir() else openpyxl.Workbook()
- ws = wb.active
- ws.title = "Odds"
- if ws.max_row == 1 and ws.max_column == 1 and ws.cell(row=1, column=1).value is None:
- ws.append(["Match", "Premier 1", "Premier X", "Premier 2", "Timestamp"])
- common_matches = matches_dict[0]
- for match, odds_list in common_matches.items():
- row = [match] + odds_list + [datetime.now().strftime("%Y-%m-%d %H:%M:%S")]
- ws.append(row)
- wb.save("odds.xlsx")
- print(f"Excel file updated with {len(common_matches)} matches")
- except Exception as e:
- print(f"Error updating Excel: {str(e)}")
- def main():
- driver = setup_driver()
- try:
- print("Starting scrape cycle...")
- matches_dict = [scrape_odds(url, driver) for url in urls]
- update_excel(matches_dict)
- print("Debug complete. Press Enter to exit...").
- driver.quit()
- except KeyboardInterrupt:
- print("Script stopped by user.")
- driver.quit()
- except Exception as e:
- print(f"Unexpected error in main loop: {str(e)}")
- print("Press Enter to exit...")
- driver.quit()
- input()
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment