nike_futbal so_q_75057281

# for so_q_75057281

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from selenium.webdriver.chrome.service import Service

import time
from urllib.parse import urljoin
from bs4 import BeautifulSoup
import pandas as pd
import os

url = 'https://www.nike.sk/live-stavky/futbal'

browser = webdriver.Chrome()
wait = WebDriverWait(browser, max_wait).until
browser.get(url)

input('load and enter')
soup = BeautifulSoup(browser.page_source.encode('utf-8'), 'html.parser')
with open('x.html', 'wb') as f:
    f.write(soup.prettify('utf-8'))


# .bet:is(button,div)
# div.accordion-header span.ellipsis # header
scrapeX = 1
matchSel = 'div.accordion-header+div.accordion-view>div.match'
matchList = {}  # []

# for m in soup.select(matchSel):
for rescrape in range(scrapeX):
    print(rescrape+1, 'of', scrapeX)
    soup = BeautifulSoup(browser.page_source.encode('utf-8'), 'html.parser')
    for m in soup.select(matchSel):
        ## Match [Group?] Header ##
        mHead = m.parent.find_previous_sibling().select_one('span.ellipsis')
        mDets = {'id': None, 'header': mHead.get_text(' ').strip()}

        ## link + id + match-more ##
        link = ''.join([l.get('href') for l in m.select('a[href]')[:1]])
        link = link.lstrip('https://www.nike.sk')
        if link and not link[:1] == '/': link = f'/{link}'
        if link:
            mId = link.split('/live-stavky/', 1)[-1].split('/')[:2][-1]
            if mId.isdigit(): mDets['id'] = mId

            moreTxt = soup.select_one(f'a.match-more[href="{link}"]')
            if moreTxt: mDets['more'] = moreTxt.get_text(' ')

            mDets['link'] = f'https://www.nike.sk{link}'

        ## Match Timer ##
        mTimer = m.select_one('div[data-atid="match-timer"][title]')
        if mTimer:
            mDets['timer'] = mTimer.get_text(' ').strip()
            mTimer = mTimer.select_one('span.ellipsis+span')
        if mTimer and mTimer.get('class'):
            mTimer = ' '.join(mTimer.get('class')).strip()
            mDets['timer_status'] = mTimer.lstrip('match-timer-')

        ## teams + other simpler selections ##
        selRef = {
            'home_team': 'span[data-atid="match-opponents-home"][title]',
            'away_team': 'span[data-atid="match-opponents-away"][title]',
            'noOdd_msg': 'span[data-atid="no-odds-at-the-moment"]',
            'bet_label': 'span.bet-label'
        }
        for k, sel in selRef.items():
            ta = 'title' if sel.endswith('[title]') else ''
            el = m.select_one(sel)
            if k == 'noOdd_msg': print([f"m.select_one('{sel}')", miniStr(el)])
            if el: mDets[k] = el.get(ta) if ta else el.get_text(' ')

        ## scores and icons ##
        for score in m.select('span[data-atid^="tlv-overview-"]'):
            scKey = score.get('data-atid').replace('tlv-overview-', '')
            scKey = scKey.replace(' ', '_').replace('-', '_').strip()
            mDets[scKey] = score.get_text(' ')
        miSel = 'button.match-icon-btn[title]'
        mIcons = ', '.join([f'"{mi.get("title")}"' for mi in m.select(miSel)])
        if mIcons: mDets['icons'] = mIcons

        # odds ## data-atid="tl-bet-lock"
        bSel = '.bet:is(button,div)'
        for gi, betGroup in enumerate(m.select(f'div:has(>{bSel})'),1):
            for ci, bet in enumerate(betGroup.select(bSel), 1):
                bKey = f'odds-g_{gi}-c_{ci}'
                bTxt = bet.get_text(' ').strip()
                atid = bet.get('data-atid', '').replace('tl-bet-', '')
                mDets[bKey] = f"[{atid.strip('odd')}] {bTxt}".replace('[]', '')

        ## reduce multi-lines before adding to matchList ##
        for k, v in mDets.items():
            if isinstance(v,str):mDets[k] = ' '.join(w for w in v.split() if w)
        if mId not in matchList:
            matchList[mId] = mDets
            continue
        for k, v in mDets.items():
            if not matchList[mId].get(k): matchList[mId][k] = v
        # matchList.append(mDets)

opFilename = 'nike_futbal.csv'
# pd.DataFrame(matchList).to_csv(opFilename, index=False)
pd.DataFrame(matchList.values()).to_csv(opFilename, index=False)


browser.quit()
del browser