Not a member of Pastebin yet?
                        Sign Up,
                        it unlocks many cool features!                    
                - # for so_q_75057281
 - from selenium import webdriver
 - from selenium.webdriver.common.by import By
 - from selenium.webdriver.support.ui import WebDriverWait
 - from selenium.webdriver.support import expected_conditions as EC
 - from selenium.webdriver.chrome.service import Service
 - import time
 - from urllib.parse import urljoin
 - from bs4 import BeautifulSoup
 - import pandas as pd
 - import os
 - url = 'https://www.nike.sk/live-stavky/futbal'
 - browser = webdriver.Chrome()
 - wait = WebDriverWait(browser, max_wait).until
 - browser.get(url)
 - input('load and enter')
 - soup = BeautifulSoup(browser.page_source.encode('utf-8'), 'html.parser')
 - with open('x.html', 'wb') as f:
 - f.write(soup.prettify('utf-8'))
 - # .bet:is(button,div)
 - # div.accordion-header span.ellipsis # header
 - scrapeX = 1
 - matchSel = 'div.accordion-header+div.accordion-view>div.match'
 - matchList = {} # []
 - # for m in soup.select(matchSel):
 - for rescrape in range(scrapeX):
 - print(rescrape+1, 'of', scrapeX)
 - soup = BeautifulSoup(browser.page_source.encode('utf-8'), 'html.parser')
 - for m in soup.select(matchSel):
 - ## Match [Group?] Header ##
 - mHead = m.parent.find_previous_sibling().select_one('span.ellipsis')
 - mDets = {'id': None, 'header': mHead.get_text(' ').strip()}
 - ## link + id + match-more ##
 - link = ''.join([l.get('href') for l in m.select('a[href]')[:1]])
 - link = link.lstrip('https://www.nike.sk')
 - if link and not link[:1] == '/': link = f'/{link}'
 - if link:
 - mId = link.split('/live-stavky/', 1)[-1].split('/')[:2][-1]
 - if mId.isdigit(): mDets['id'] = mId
 - moreTxt = soup.select_one(f'a.match-more[href="{link}"]')
 - if moreTxt: mDets['more'] = moreTxt.get_text(' ')
 - mDets['link'] = f'https://www.nike.sk{link}'
 - ## Match Timer ##
 - mTimer = m.select_one('div[data-atid="match-timer"][title]')
 - if mTimer:
 - mDets['timer'] = mTimer.get_text(' ').strip()
 - mTimer = mTimer.select_one('span.ellipsis+span')
 - if mTimer and mTimer.get('class'):
 - mTimer = ' '.join(mTimer.get('class')).strip()
 - mDets['timer_status'] = mTimer.lstrip('match-timer-')
 - ## teams + other simpler selections ##
 - selRef = {
 - 'home_team': 'span[data-atid="match-opponents-home"][title]',
 - 'away_team': 'span[data-atid="match-opponents-away"][title]',
 - 'noOdd_msg': 'span[data-atid="no-odds-at-the-moment"]',
 - 'bet_label': 'span.bet-label'
 - }
 - for k, sel in selRef.items():
 - ta = 'title' if sel.endswith('[title]') else ''
 - el = m.select_one(sel)
 - if k == 'noOdd_msg': print([f"m.select_one('{sel}')", miniStr(el)])
 - if el: mDets[k] = el.get(ta) if ta else el.get_text(' ')
 - ## scores and icons ##
 - for score in m.select('span[data-atid^="tlv-overview-"]'):
 - scKey = score.get('data-atid').replace('tlv-overview-', '')
 - scKey = scKey.replace(' ', '_').replace('-', '_').strip()
 - mDets[scKey] = score.get_text(' ')
 - miSel = 'button.match-icon-btn[title]'
 - mIcons = ', '.join([f'"{mi.get("title")}"' for mi in m.select(miSel)])
 - if mIcons: mDets['icons'] = mIcons
 - # odds ## data-atid="tl-bet-lock"
 - bSel = '.bet:is(button,div)'
 - for gi, betGroup in enumerate(m.select(f'div:has(>{bSel})'),1):
 - for ci, bet in enumerate(betGroup.select(bSel), 1):
 - bKey = f'odds-g_{gi}-c_{ci}'
 - bTxt = bet.get_text(' ').strip()
 - atid = bet.get('data-atid', '').replace('tl-bet-', '')
 - mDets[bKey] = f"[{atid.strip('odd')}] {bTxt}".replace('[]', '')
 - ## reduce multi-lines before adding to matchList ##
 - for k, v in mDets.items():
 - if isinstance(v,str):mDets[k] = ' '.join(w for w in v.split() if w)
 - if mId not in matchList:
 - matchList[mId] = mDets
 - continue
 - for k, v in mDets.items():
 - if not matchList[mId].get(k): matchList[mId][k] = v
 - # matchList.append(mDets)
 - opFilename = 'nike_futbal.csv'
 - # pd.DataFrame(matchList).to_csv(opFilename, index=False)
 - pd.DataFrame(matchList.values()).to_csv(opFilename, index=False)
 - browser.quit()
 - del browser
 
Advertisement
 
                    Add Comment                
                
                        Please, Sign In to add comment