Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import time
- import requests
- from bs4 import BeautifulSoup
- import lxml
- from MatchScrape import MatchScrape
- # TODO add several queries based on different regions than just europe west.
- class Main:
- def __init__(self):
- self.soup = BeautifulSoup(self.getpage(), 'lxml')
- self.matches = []
- def main(self):
- self.matchscrape()
- def getpage(self):
- result = requests.get(
- "https://www.dotabuff.com/matches?game_mode=all_pick&lobby_type=ranked_matchmaking®ion=europe_west&skill_bracket=very_high_skill",
- headers={'User-agent': 'your bot 0.1'})
- if result.status_code == 200:
- print("found page")
- return result.content
- else:
- print("did not find page, waiting 1 second")
- time.sleep(1000)
- self.getpage()
- def refreshpage(self):
- self.soup = BeautifulSoup(self.getpage(), 'lxml')
- def matchscrape(self):
- self.matches = []
- # get all match IDs and "minutes ago" with beatifulsoup.
- # get the table entries
- matchTable = self.soup.find("table").find("tbody").find_all("tr")
- # get the id and time associated
- for tr in matchTable:
- cols = tr.find_all('td')
- # due to website layout, we only need the first col.
- matchColumn = cols[0]
- matchId = matchColumn.find('a').text
- # print(matchId)
- matchDate = matchColumn.find('time')['datetime']
- # print(matchDate)
- match = MatchScrape(matchId, matchDate)
- # just incase, lets try to not add several of same matchId.
- if not any(matc.matchId == matchId for matc in self.matches):
- self.matches.append(match)
- def getmatchtable(self):
- return self.matches
- if __name__ == '__main__':
- m = Main()
- m.main()
- class MatchScrape:
- date = ""
- matchId = ""
- region = ""
- downloaded = False
- def __init__(self, matchid, date):
- self.matchId = matchid
- self.date = date
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement