Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib2
- import re
- import csv
- from bs4 import BeautifulSoup
- teams = {'ana', 'bos', 'buf', 'cal', 'car', 'chi', 'col', 'cbj', 'dal', 'det', 'edm', 'flo', 'lak', 'min', 'mtl', 'nsh', 'njd', 'nyi', 'nyr', 'ott', 'phi', 'phx', 'pit', 'sjs', 'stl', 'tbl', 'tor', 'van', 'wpg', 'wsh'}
- gameidset = set()
- print("Collecting game IDs. This may take a while.")
- for i in teams:
- response = urllib2.urlopen('http://www.nhl.com/ice/schedulebyseason.htm?team=' + i)
- html = response.read()
- soup = BeautifulSoup(html)
- for link in soup.find_all('a'):
- linkCandidate = str(link.get('href'))
- if linkCandidate.startswith('http://www.nhl.com/gamecenter/en/recap?id='):
- gameidset.add(re.sub(re.escape('http://www.nhl.com/gamecenter/en/recap?id='), '', linkCandidate))
- print("Finished collecting game IDs.")
- datafile = open('data.csv', 'wb')
- wr = csv.writer(datafile, quoting=csv.QUOTE_NONE)
- wr.writerow(['GameID', 'HomeTeam', 'AwayTeam', 'HomePIM', 'AwayPIM', 'Referee1', 'Referee2'])
- for i in gameidset:
- response = urllib2.urlopen('http://www.nhl.com/gamecenter/en/boxscore?id=' + i)
- html = response.read()
- soup = BeautifulSoup(html)
- gamePack = [i]
- gamePack.append(re.sub('<[^<]+?>', '', str(soup.find("th", "ht"))))
- gamePack.append(re.sub('<[^<]+?>', '', str(soup.find("th", "at"))))
- gamePack.append(re.sub('<[^<]+?>', '', str(soup.find("td", "hPIM"))))
- gamePack.append(re.sub('<[^<]+?>', '', str(soup.find("td", "aPIM"))))
- extraInfo = str(re.sub('<[^<]+?>', '', str(soup.find_all("li"))))
- refs = str(re.findall('Referees:.*Linesmen', extraInfo))
- refs = refs[12:]
- refs = refs[:-12]
- gamePack.extend([x.strip() for x in refs.split(',')])
- print(gamePack)
- wr.writerow(gamePack)
- datafile.close()
- refList = set()
- teams = {'ana', 'bos', 'buf', 'cal', 'car', 'chi', 'col', 'cbj', 'dal', 'det', 'edm', 'flo', 'lak', 'min', 'mtl', 'nsh', 'njd', 'nyi', 'nyr', 'ott', 'phi', 'phx', 'pit', 'sjs', 'stl', 'tbl', 'tor', 'van', 'wpg', 'wsh'}
- datafile = open('data.csv', 'rb')
- reader = csv.reader(datafile, delimiter=',')
- next(reader, None)
- for row in reader:
- refList.add(row[5])
- refList.add(row[6])
- print(refList)
- datafile.close()
- datafile = open('data.csv', 'rb')
- datafile2 = open('longdata.csv', 'wb')
- wr = csv.writer(datafile2, quoting=csv.QUOTE_NONE)
- wr.writerow(['Team', 'PIM', 'Referee1', 'Referee2'])
- reader = csv.reader(datafile, delimiter=',')
- next(reader, None)
- for row in reader:
- print([row[1], row[3], row[5], row[6]])
- print([row[2], row[4], row[5], row[6]])
- wr.writerow([row[1], row[3], row[5], row[6]])
- wr.writerow([row[2], row[4], row[5], row[6]])
- datafile.close()
- datafile2.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement