Try95th

nike_futbal so_q_75057281

Jan 10th, 2023 (edited)
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.34 KB | None | 0 0
  1. # for so_q_75057281
  2.  
  3. from selenium import webdriver
  4. from selenium.webdriver.common.by import By
  5. from selenium.webdriver.support.ui import WebDriverWait
  6. from selenium.webdriver.support import expected_conditions as EC
  7.  
  8. from selenium.webdriver.chrome.service import Service
  9.  
  10. import time
  11. from urllib.parse import urljoin
  12. from bs4 import BeautifulSoup
  13. import pandas as pd
  14. import os
  15.  
  16. url = 'https://www.nike.sk/live-stavky/futbal'
  17.  
  18. browser = webdriver.Chrome()
  19. wait = WebDriverWait(browser, max_wait).until
  20. browser.get(url)
  21.  
  22. input('load and enter')
  23. soup = BeautifulSoup(browser.page_source.encode('utf-8'), 'html.parser')
  24. with open('x.html', 'wb') as f:
  25.     f.write(soup.prettify('utf-8'))
  26.  
  27.  
  28. # .bet:is(button,div)
  29. # div.accordion-header span.ellipsis # header
  30. scrapeX = 1
  31. matchSel = 'div.accordion-header+div.accordion-view>div.match'
  32. matchList = {}  # []
  33.  
  34. # for m in soup.select(matchSel):
  35. for rescrape in range(scrapeX):
  36.     print(rescrape+1, 'of', scrapeX)
  37.     soup = BeautifulSoup(browser.page_source.encode('utf-8'), 'html.parser')
  38.     for m in soup.select(matchSel):
  39.         ## Match [Group?] Header ##
  40.         mHead = m.parent.find_previous_sibling().select_one('span.ellipsis')
  41.         mDets = {'id': None, 'header': mHead.get_text(' ').strip()}
  42.        
  43.         ## link + id + match-more ##
  44.         link = ''.join([l.get('href') for l in m.select('a[href]')[:1]])
  45.         link = link.lstrip('https://www.nike.sk')
  46.         if link and not link[:1] == '/': link = f'/{link}'
  47.         if link:  
  48.             mId = link.split('/live-stavky/', 1)[-1].split('/')[:2][-1]
  49.             if mId.isdigit(): mDets['id'] = mId
  50.  
  51.             moreTxt = soup.select_one(f'a.match-more[href="{link}"]')
  52.             if moreTxt: mDets['more'] = moreTxt.get_text(' ')
  53.  
  54.             mDets['link'] = f'https://www.nike.sk{link}'
  55.  
  56.         ## Match Timer ##
  57.         mTimer = m.select_one('div[data-atid="match-timer"][title]')
  58.         if mTimer:
  59.             mDets['timer'] = mTimer.get_text(' ').strip()
  60.             mTimer = mTimer.select_one('span.ellipsis+span')
  61.         if mTimer and mTimer.get('class'):
  62.             mTimer = ' '.join(mTimer.get('class')).strip()
  63.             mDets['timer_status'] = mTimer.lstrip('match-timer-')
  64.  
  65.         ## teams + other simpler selections ##
  66.         selRef = {
  67.             'home_team': 'span[data-atid="match-opponents-home"][title]',
  68.             'away_team': 'span[data-atid="match-opponents-away"][title]',
  69.             'noOdd_msg': 'span[data-atid="no-odds-at-the-moment"]',
  70.             'bet_label': 'span.bet-label'
  71.         }
  72.         for k, sel in selRef.items():
  73.             ta = 'title' if sel.endswith('[title]') else ''
  74.             el = m.select_one(sel)
  75.             if k == 'noOdd_msg': print([f"m.select_one('{sel}')", miniStr(el)])
  76.             if el: mDets[k] = el.get(ta) if ta else el.get_text(' ')
  77.  
  78.         ## scores and icons ##
  79.         for score in m.select('span[data-atid^="tlv-overview-"]'):
  80.             scKey = score.get('data-atid').replace('tlv-overview-', '')
  81.             scKey = scKey.replace(' ', '_').replace('-', '_').strip()
  82.             mDets[scKey] = score.get_text(' ')
  83.         miSel = 'button.match-icon-btn[title]'
  84.         mIcons = ', '.join([f'"{mi.get("title")}"' for mi in m.select(miSel)])
  85.         if mIcons: mDets['icons'] = mIcons
  86.  
  87.         # odds ## data-atid="tl-bet-lock"
  88.         bSel = '.bet:is(button,div)'
  89.         for gi, betGroup in enumerate(m.select(f'div:has(>{bSel})'),1):
  90.             for ci, bet in enumerate(betGroup.select(bSel), 1):
  91.                 bKey = f'odds-g_{gi}-c_{ci}'
  92.                 bTxt = bet.get_text(' ').strip()
  93.                 atid = bet.get('data-atid', '').replace('tl-bet-', '')
  94.                 mDets[bKey] = f"[{atid.strip('odd')}] {bTxt}".replace('[]', '')
  95.  
  96.         ## reduce multi-lines before adding to matchList ##
  97.         for k, v in mDets.items():
  98.             if isinstance(v,str):mDets[k] = ' '.join(w for w in v.split() if w)
  99.         if mId not in matchList:
  100.             matchList[mId] = mDets
  101.             continue
  102.         for k, v in mDets.items():
  103.             if not matchList[mId].get(k): matchList[mId][k] = v
  104.         # matchList.append(mDets)
  105.        
  106. opFilename = 'nike_futbal.csv'
  107. # pd.DataFrame(matchList).to_csv(opFilename, index=False)
  108. pd.DataFrame(matchList.values()).to_csv(opFilename, index=False)
  109.  
  110.  
  111. browser.quit()
  112. del browser
  113.  
Advertisement
Add Comment
Please, Sign In to add comment