Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import urllib
- import csv
- def get_soup(url):
- """Generates beautiful soup from html page with url"""
- #get the page
- page = urllib.urlopen(url)
- html = page.read()
- page.close()
- #make the soup
- return BeautifulSoup(html)
- def get_teams(teams_url):
- """Gets a dictionary of teams.
- Structure:
- {'team_name': {'url': url}, ...}
- """
- soup = get_soup(teams_url)
- table = soup.find('table', {"class" : "table table-bordered table-striped"})
- #get the team name and url
- teams = {}
- rows = table.findAll('tr',{"class" : "success"})
- for tr in rows[1:]:
- team_info = {}
- team = tr.contents[1].a.string.strip()
- team_url = tr.contents[1].a['href']
- team_info['url'] = team_url
- teams[team] = team_info
- #add YoloSwag by hand. Beautiful soup doesn't like the hashtag in their name.
- teams["YOLOSwag"] = {'url':"/tournaments/attack-of-the-cores/teams/yoloswag"}
- print "len(teams): ", len(teams)
- return teams
- def get_roster(teams, base_url):
- """Get roster for each team in teams.
- Returns new teams dictionary.
- Structure:
- {'team_name': {'url': url, 'roster': {'player_name': {'url': url}, ...}}, ...}
- """
- for team in teams:
- roster_url = base_url + teams[team]['url']
- soup = get_soup(roster_url)
- table = soup.find('table', {"class" : "table table-bordered table-striped"})
- #get the team name and url
- roster = {}
- rows = table.findAll('tr')
- for tr in rows[1:]:
- player_info = {}
- player = tr.contents[1].a.string.strip()
- player_url = tr.contents[1].a['href']
- player_info['url'] = player_url
- roster[player] = player_info
- teams[team]['roster'] = roster
- return teams
- def get_player_stats(player_url):
- """Get stats for a given player. Returns them in dictionary
- Structure:
- {'stat': value, ...}
- """
- stats = {}
- soup = get_soup(player_url)
- # see if donor
- table = soup.findAll('span', {"class" : "label", "style" : "background-color: #0A0"})
- if len(table) > 0:
- if table[0].contents[0] == "Donor":
- stats['donor'] = True
- else:
- stats['donor'] = False
- #get kills, deaths
- table = soup.findAll('div', {"class" : "span2"})
- stats['kills'] = int(table[0].h2.contents[0])
- deaths = int(table[1].h2.contents[0])
- stats['deaths'] = deaths
- #get objectives
- table = soup.findAll('div', {"class" : "span4"})
- objs_table = []
- for i in range(len(table)):
- if table[i].h2 != None:
- objs_table.append(table[i].h2.contents[0])
- wools = int(objs_table[0])
- cores = int(objs_table[1])
- monuments = int(objs_table[2])
- objectives = wools + cores + monuments
- stats['wools'] = wools
- stats['cores'] = cores
- stats['monuments'] = monuments
- stats['objectives']= objectives
- #avoid division by 0
- if deaths != 0:
- #get kd, kk
- table = soup.findAll('div', {"class" : "span3"})
- kdkk_table = table[1].findAll('h2')
- stats['kd'] = float(kdkk_table[0].contents[0])
- stats['kk'] = float(kdkk_table[1].contents[0])
- #compute other stats
- stats['cd'] = float(cores)/deaths
- stats['wd'] = float(wools)/deaths
- stats['md'] = float(monuments)/deaths
- stats['od'] = float(objectives)/deaths
- else:
- stats['kd'] = None
- stats['kk'] = None
- stats['cd'] = None
- stats['wd'] = None
- stats['md'] = None
- stats['od'] = None
- return stats
- def get_team_stats(teams):
- """Get stats for each team. Calls get_player_stats()
- Returns new teams dictionary.
- Structure:
- {'team_name': {'url': url, 'roster': {'player_name': {'url': url, 'stats': {'stat': value, ...}} ...}}, ...}
- """
- stats = {}
- for team in teams:
- roster = teams[team]['roster']
- for player in roster:
- player_url = base_url + roster[player]['url']
- stats = get_player_stats(player_url)
- teams[team]['roster'][player]['stats'] = stats
- return teams
- def compute_stats(teams):
- """Computes average and total stats for each team.
- Returns new teams dictionary.
- Structure:
- {'team_name': {'url': url, 'roster': {'player_name': {'url': url, 'stats': {'stat': value, ...}} ...}, 'stats':{'stat1': value, ...}}, ...}
- """
- stat_cat = ['kills','deaths','wools','cores','monuments','objectives','kd','kk','wd','cd','md','od']
- for team in teams:
- team_stats = {}
- for stat in stat_cat:
- stat_sum = 0
- none_count = 0
- roster = teams[team]['roster']
- for player in roster:
- stats = roster[player]['stats']
- if stats[stat] != None:
- stat_sum = stat_sum + stats[stat]
- else:
- none_count = none_count + 1
- average = float(stat_sum)/(len(roster) - none_count)
- team_stats['total_'+stat] = stat_sum
- team_stats['avg_'+stat] = average
- donors = 0
- for player in roster:
- if roster[player]['stats']['donor'] == True:
- donors = donors + 1
- team_stats['donors'] = donors
- teams[team]['stats'] = team_stats
- return teams
- def stats_printer(teams, out_file):
- """Prints teams stats data to CSV file."""
- f = open(out_file, "wb")
- c = csv.writer(f)
- #print header
- c.writerow(['Team', 'Average KD', 'Average CD', 'Average MD',
- 'Total Kills', 'Total Cores', 'Total Monuments', 'Donors'])
- for team in teams:
- s = teams[team]['stats']
- #print stats
- c.writerow([team, s['avg_kd'], s['avg_cd'], s['avg_md'], s['total_kills'],
- s['total_cores'], s['total_monuments'], s['donors']])
- f.close
- # Run
- base_url = "https://oc.tc"
- tourny_url = base_url + "/tournaments/attack-of-the-cores"
- out_file = "ares_stats_out2.csv"
- print 'getting teams'
- teams = get_teams(tourny_url)
- print'got teams. getting rosters'
- teams = get_roster(teams, base_url)
- print 'got rosters. getting stats data'
- teams = get_team_stats(teams)
- print 'got stats data. computing stats'
- teams = compute_stats(teams)
- print 'stats computed. printing to file'
- stats_printer(teams, out_file)
- print 'GG?'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement