Guest User

Untitled

a guest
May 2nd, 2025
50
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.59 KB | None | 0 0
  1. from selenium import webdriver
  2. from selenium.webdriver.chrome.service import Service
  3. from selenium.webdriver.common.by import By
  4. from selenium.webdriver.chrome.options import Options
  5. from selenium.webdriver.support.ui import WebDriverWait
  6. from selenium.webdriver.support import expected_conditions as EC
  7. import openpyxl
  8. from datetime import datetime
  9. import time
  10. import os
  11.  
  12. # URLs (focusing on Premier Kladionica for now)
  13. urls = [
  14. "https://www.premier-kladionica.com/ponuda"
  15. ]
  16.  
  17. def setup_driver():
  18. chrome_options = Options()
  19. # Removed --headless for debugging (browser will be visible)
  20. chrome_options.add_argument("--no-sandbox")
  21. chrome_options.add_argument("--disable-dev-shm-usage")
  22. service = Service("chromedriver.exe") # Ensure chromedriver.exe is in the script directory
  23. driver = webdriver.Chrome(service=service, options=chrome_options)
  24. return driver
  25.  
  26. def scrape_odds(url, driver):
  27. try:
  28. driver.get(url)
  29. print("Page loaded, waiting for initial connection to complete...")
  30.  
  31. # Wait for the loading indicator to disappear (adjust selector if needed)
  32. WebDriverWait(driver, 20).until_not(
  33. EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Spajanje u toku')]"))
  34. )
  35. print("Loading complete, proceeding with scraping...")
  36.  
  37. # Wait for the table to be present
  38. WebDriverWait(driver, 10).until(
  39. EC.presence_of_element_located((By.ID, "prikaz_ponude_table"))
  40. )
  41.  
  42. # Expand leagues by clicking on elements with class "cell--SPOJENO"
  43. expand_buttons = WebDriverWait(driver, 10).until(
  44. EC.presence_of_all_elements_located((By.CSS_SELECTOR, "span.cell--SPOJENO"))
  45. )
  46. print(f"Found {len(expand_buttons)} expand buttons.")
  47. for i, button in enumerate(expand_buttons):
  48. try:
  49. driver.execute_script("arguments[0].scrollIntoView(true);", button) # Scroll to the button
  50. WebDriverWait(driver, 5).until(EC.element_to_be_clickable(button))
  51. driver.execute_script("arguments[0].click();", button) # Use JavaScript to click
  52. print(f"Clicked expand button {i+1}.")
  53. time.sleep(2) # Wait for the section to expand
  54. except Exception as e:
  55. print(f"Could not click expand button {i+1}: {e}")
  56. continue
  57.  
  58. # Wait for match rows to be visible after expanding
  59. WebDriverWait(driver, 15).until(
  60. EC.presence_of_element_located((By.CSS_SELECTOR, "tr.prPrikaz"))
  61. )
  62.  
  63. matches = {}
  64. # Find the table containing the matches
  65. match_table = driver.find_element(By.ID, "prikaz_ponude_table")
  66. match_rows = match_table.find_elements(By.CSS_SELECTOR, "tr.prPrikaz")
  67.  
  68. print(f"Found {len(match_rows)} match rows in the table.")
  69.  
  70. for match in match_rows:
  71. # Extract teams
  72. try:
  73. teams_elem = match.find_element(By.CSS_SELECTOR, "td.prFontBol")
  74. teams = teams_elem.text.strip().replace("\n", " vs ")
  75. except Exception as e:
  76. print(f"Error finding teams: {e}")
  77. continue
  78.  
  79. # Extract odds (1, X, 2)
  80. try:
  81. odds_elements = match.find_elements(By.CSS_SELECTOR, "td.br-48")
  82. odds_list = []
  83. for odd in odds_elements[:3]: # Take first 3 odds (1, X, 2)
  84. odd_text = odd.text.strip()
  85. if odd_text.replace('.', '').isdigit():
  86. odds_list.append(odd_text)
  87.  
  88. if len(odds_list) == 3:
  89. matches[teams] = odds_list
  90. else:
  91. print(f"Could not extract 3 odds for {teams}: {odds_list}")
  92. except Exception as e:
  93. print(f"Error extracting odds for {teams}: {e}")
  94. continue
  95.  
  96. print(f"Successfully scraped {url}: {len(matches)} matches found")
  97. return matches
  98. except Exception as e:
  99. print(f"Error scraping {url}: {str(e)}")
  100. return {}
  101.  
  102. def update_excel(matches_dict):
  103. try:
  104. wb = openpyxl.load_workbook("odds.xlsx") if "odds.xlsx" in os.listdir() else openpyxl.Workbook()
  105. ws = wb.active
  106. ws.title = "Odds"
  107.  
  108. if ws.max_row == 1 and ws.max_column == 1 and ws.cell(row=1, column=1).value is None:
  109. ws.append(["Match", "Premier 1", "Premier X", "Premier 2", "Timestamp"])
  110.  
  111. common_matches = matches_dict[0]
  112.  
  113. for match, odds_list in common_matches.items():
  114. row = [match] + odds_list + [datetime.now().strftime("%Y-%m-%d %H:%M:%S")]
  115. ws.append(row)
  116.  
  117. wb.save("odds.xlsx")
  118. print(f"Excel file updated with {len(common_matches)} matches")
  119. except Exception as e:
  120. print(f"Error updating Excel: {str(e)}")
  121.  
  122. def main():
  123. driver = setup_driver()
  124. try:
  125. print("Starting scrape cycle...")
  126. matches_dict = [scrape_odds(url, driver) for url in urls]
  127. update_excel(matches_dict)
  128. print("Debug complete. Press Enter to exit...").
  129. driver.quit()
  130. except KeyboardInterrupt:
  131. print("Script stopped by user.")
  132. driver.quit()
  133. except Exception as e:
  134. print(f"Unexpected error in main loop: {str(e)}")
  135. print("Press Enter to exit...")
  136. driver.quit()
  137. input()
  138.  
  139. if __name__ == "__main__":
  140. main()
Advertisement
Add Comment
Please, Sign In to add comment