Advertisement
Guest User

Untitled

a guest
Feb 8th, 2014
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.61 KB | None | 0 0
  1. import urllib2
  2. import re
  3. import csv
  4. from bs4 import BeautifulSoup
  5.  
  6. teams = {'ana', 'bos', 'buf', 'cal', 'car', 'chi', 'col', 'cbj', 'dal', 'det', 'edm', 'flo', 'lak', 'min', 'mtl', 'nsh', 'njd', 'nyi', 'nyr', 'ott', 'phi', 'phx', 'pit', 'sjs', 'stl', 'tbl', 'tor', 'van', 'wpg', 'wsh'}
  7. gameidset = set()
  8.  
  9. print("Collecting game IDs. This may take a while.")
  10.  
  11. for i in teams:
  12. response = urllib2.urlopen('http://www.nhl.com/ice/schedulebyseason.htm?team=' + i)
  13. html = response.read()
  14. soup = BeautifulSoup(html)
  15. for link in soup.find_all('a'):
  16. linkCandidate = str(link.get('href'))
  17. if linkCandidate.startswith('http://www.nhl.com/gamecenter/en/recap?id='):
  18. gameidset.add(re.sub(re.escape('http://www.nhl.com/gamecenter/en/recap?id='), '', linkCandidate))
  19.  
  20. print("Finished collecting game IDs.")
  21.  
  22. datafile = open('data.csv', 'wb')
  23. wr = csv.writer(datafile, quoting=csv.QUOTE_NONE)
  24. wr.writerow(['GameID', 'HomeTeam', 'AwayTeam', 'HomePIM', 'AwayPIM', 'Referee1', 'Referee2'])
  25.  
  26. for i in gameidset:
  27. response = urllib2.urlopen('http://www.nhl.com/gamecenter/en/boxscore?id=' + i)
  28. html = response.read()
  29. soup = BeautifulSoup(html)
  30. gamePack = [i]
  31. gamePack.append(re.sub('<[^<]+?>', '', str(soup.find("th", "ht"))))
  32. gamePack.append(re.sub('<[^<]+?>', '', str(soup.find("th", "at"))))
  33. gamePack.append(re.sub('<[^<]+?>', '', str(soup.find("td", "hPIM"))))
  34. gamePack.append(re.sub('<[^<]+?>', '', str(soup.find("td", "aPIM"))))
  35. extraInfo = str(re.sub('<[^<]+?>', '', str(soup.find_all("li"))))
  36. refs = str(re.findall('Referees:.*Linesmen', extraInfo))
  37. refs = refs[12:]
  38. refs = refs[:-12]
  39. gamePack.extend([x.strip() for x in refs.split(',')])
  40. print(gamePack)
  41. wr.writerow(gamePack)
  42.  
  43. datafile.close()
  44.  
  45. refList = set()
  46. teams = {'ana', 'bos', 'buf', 'cal', 'car', 'chi', 'col', 'cbj', 'dal', 'det', 'edm', 'flo', 'lak', 'min', 'mtl', 'nsh', 'njd', 'nyi', 'nyr', 'ott', 'phi', 'phx', 'pit', 'sjs', 'stl', 'tbl', 'tor', 'van', 'wpg', 'wsh'}
  47.  
  48. datafile = open('data.csv', 'rb')
  49. reader = csv.reader(datafile, delimiter=',')
  50. next(reader, None)
  51. for row in reader:
  52. refList.add(row[5])
  53. refList.add(row[6])
  54. print(refList)
  55. datafile.close()
  56.  
  57. datafile = open('data.csv', 'rb')
  58. datafile2 = open('longdata.csv', 'wb')
  59. wr = csv.writer(datafile2, quoting=csv.QUOTE_NONE)
  60. wr.writerow(['Team', 'PIM', 'Referee1', 'Referee2'])
  61. reader = csv.reader(datafile, delimiter=',')
  62. next(reader, None)
  63. for row in reader:
  64. print([row[1], row[3], row[5], row[6]])
  65. print([row[2], row[4], row[5], row[6]])
  66. wr.writerow([row[1], row[3], row[5], row[6]])
  67. wr.writerow([row[2], row[4], row[5], row[6]])
  68. datafile.close()
  69. datafile2.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement