Advertisement
bcbwilla

Project Ares Tournament Statistics Scraper

Feb 14th, 2013
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.49 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import urllib
  3. import csv
  4.  
  5. def get_soup(url):
  6.     """Generates beautiful soup from html page with url"""
  7.     #get the page
  8.     page = urllib.urlopen(url)
  9.     html = page.read()
  10.     page.close()    
  11.     #make the soup
  12.     return BeautifulSoup(html)
  13.  
  14. def get_teams(teams_url):
  15.     """Gets a dictionary of teams.
  16.    Structure:
  17.    {'team_name': {'url': url}, ...}
  18.    """  
  19.     soup = get_soup(teams_url)
  20.     table = soup.find('table', {"class" : "table table-bordered table-striped"})
  21.     #get the team name and url
  22.     teams = {}
  23.     rows = table.findAll('tr',{"class" : "success"})
  24.     for tr in rows[1:]:
  25.         team_info = {}          
  26.         team = tr.contents[1].a.string.strip()
  27.         team_url = tr.contents[1].a['href']
  28.         team_info['url'] = team_url
  29.         teams[team] = team_info
  30.     #add YoloSwag by hand.  Beautiful soup doesn't like the hashtag in their name.    
  31.     teams["YOLOSwag"] = {'url':"/tournaments/attack-of-the-cores/teams/yoloswag"}
  32.     print "len(teams): ", len(teams)
  33.     return teams
  34.  
  35. def get_roster(teams, base_url):  
  36.     """Get roster for each team in teams.
  37.    Returns new teams dictionary.
  38.    Structure:
  39.    {'team_name': {'url': url, 'roster': {'player_name': {'url': url}, ...}}, ...}
  40.    """  
  41.     for team in teams:
  42.         roster_url = base_url + teams[team]['url']
  43.         soup = get_soup(roster_url)
  44.         table = soup.find('table', {"class" : "table table-bordered table-striped"})
  45.         #get the team name and url
  46.         roster = {}
  47.         rows = table.findAll('tr')
  48.         for tr in rows[1:]:
  49.             player_info = {}
  50.             player = tr.contents[1].a.string.strip()
  51.             player_url = tr.contents[1].a['href']
  52.             player_info['url'] = player_url
  53.             roster[player] = player_info
  54.             teams[team]['roster'] = roster            
  55.     return teams
  56.  
  57. def get_player_stats(player_url):  
  58.     """Get stats for a given player. Returns them in dictionary
  59.    Structure:
  60.    {'stat': value, ...}
  61.    """    
  62.     stats = {}
  63.     soup = get_soup(player_url)
  64.     # see if donor
  65.     table = soup.findAll('span', {"class" : "label", "style" : "background-color: #0A0"})
  66.     if len(table) > 0:
  67.         if table[0].contents[0] == "Donor":
  68.             stats['donor'] = True
  69.     else:
  70.         stats['donor'] = False
  71.     #get kills, deaths
  72.     table = soup.findAll('div', {"class" : "span2"})
  73.     stats['kills'] = int(table[0].h2.contents[0])
  74.     deaths = int(table[1].h2.contents[0])
  75.     stats['deaths']  = deaths
  76.     #get objectives
  77.     table = soup.findAll('div', {"class" : "span4"})
  78.     objs_table = []
  79.     for i in range(len(table)):
  80.         if table[i].h2 != None:
  81.             objs_table.append(table[i].h2.contents[0])
  82.     wools = int(objs_table[0])
  83.     cores = int(objs_table[1])
  84.     monuments = int(objs_table[2])
  85.     objectives = wools + cores + monuments
  86.     stats['wools'] = wools
  87.     stats['cores'] = cores
  88.     stats['monuments'] = monuments
  89.     stats['objectives']= objectives
  90.     #avoid division by 0
  91.     if deaths != 0:
  92.         #get kd, kk
  93.         table = soup.findAll('div', {"class" : "span3"})
  94.         kdkk_table = table[1].findAll('h2')
  95.         stats['kd'] = float(kdkk_table[0].contents[0])
  96.         stats['kk'] = float(kdkk_table[1].contents[0])
  97.         #compute other stats
  98.         stats['cd'] = float(cores)/deaths
  99.         stats['wd'] = float(wools)/deaths
  100.         stats['md'] = float(monuments)/deaths
  101.         stats['od'] = float(objectives)/deaths
  102.     else:
  103.         stats['kd'] = None
  104.         stats['kk'] = None
  105.         stats['cd'] = None
  106.         stats['wd'] = None
  107.         stats['md'] = None
  108.         stats['od'] = None        
  109.     return stats
  110.  
  111. def get_team_stats(teams):
  112.     """Get stats for each team. Calls get_player_stats()
  113.    Returns new teams dictionary.
  114.    Structure:
  115.    {'team_name': {'url': url, 'roster': {'player_name': {'url': url, 'stats': {'stat': value, ...}} ...}}, ...}
  116.    """
  117.     stats = {}
  118.     for team in teams:
  119.         roster = teams[team]['roster']
  120.         for player in roster:
  121.             player_url = base_url + roster[player]['url']
  122.             stats = get_player_stats(player_url)
  123.             teams[team]['roster'][player]['stats'] = stats
  124.     return teams
  125.  
  126. def compute_stats(teams):
  127.     """Computes average and total stats for each team.
  128.    Returns new teams dictionary.
  129.    Structure:
  130.    {'team_name': {'url': url, 'roster': {'player_name': {'url': url, 'stats': {'stat': value, ...}} ...}, 'stats':{'stat1': value, ...}}, ...}
  131.    """
  132.     stat_cat = ['kills','deaths','wools','cores','monuments','objectives','kd','kk','wd','cd','md','od']  
  133.     for team in teams:
  134.         team_stats = {}
  135.         for stat in stat_cat:
  136.             stat_sum = 0
  137.             none_count = 0
  138.             roster = teams[team]['roster']
  139.             for player in roster:
  140.                 stats =  roster[player]['stats']
  141.                 if stats[stat] != None:
  142.                     stat_sum = stat_sum + stats[stat]
  143.                 else:
  144.                     none_count = none_count + 1
  145.                 average = float(stat_sum)/(len(roster) - none_count)
  146.             team_stats['total_'+stat] = stat_sum
  147.             team_stats['avg_'+stat] = average          
  148.         donors = 0
  149.         for player in roster:
  150.             if roster[player]['stats']['donor'] == True:
  151.                 donors = donors + 1
  152.         team_stats['donors'] = donors
  153.         teams[team]['stats'] = team_stats
  154.     return teams
  155.  
  156. def stats_printer(teams, out_file):
  157.     """Prints teams stats data to CSV file."""
  158.     f = open(out_file, "wb")
  159.     c = csv.writer(f)
  160.     #print header
  161.     c.writerow(['Team', 'Average KD', 'Average CD', 'Average MD',
  162.     'Total Kills', 'Total Cores', 'Total Monuments', 'Donors'])
  163.    
  164.     for team in teams:
  165.         s = teams[team]['stats']
  166.         #print stats
  167.         c.writerow([team, s['avg_kd'], s['avg_cd'], s['avg_md'], s['total_kills'],
  168.                    s['total_cores'], s['total_monuments'], s['donors']])
  169.     f.close
  170.  
  171. # Run
  172. base_url = "https://oc.tc"
  173. tourny_url = base_url + "/tournaments/attack-of-the-cores"
  174. out_file = "ares_stats_out2.csv"
  175.  
  176. print 'getting teams'
  177. teams = get_teams(tourny_url)
  178. print'got teams. getting rosters'
  179. teams = get_roster(teams, base_url)
  180. print 'got rosters. getting stats data'
  181. teams = get_team_stats(teams)
  182. print 'got stats data. computing stats'
  183. teams = compute_stats(teams)
  184. print 'stats computed. printing to file'
  185. stats_printer(teams, out_file)
  186. print 'GG?'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement