Advertisement
Guest User

Untitled

a guest
May 6th, 2016
53
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.46 KB | None | 0 0
  1. url = "http://www.dotabuff.com/players/68242248/heroes"
  2.  
  3. user_agent = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'
  4. headers = {'User-Agent': user_agent}
  5.  
  6. request = urllib2.Request(url, None, headers)
  7. response = urllib2.urlopen(request)
  8.  
  9.  
  10. class dotaBuffHTMLParser(HTMLParser):
  11.     def __init__(self):
  12.  
  13.         HTMLParser.__init__(self)
  14.         self.data = False
  15.         self.table_headings = []
  16.         self.player_stats_table = []
  17.         self.player_stats_row = []
  18.         self.player_stats_cell = ""
  19.  
  20.     def handle_starttag(self, tag, attrs):
  21.         if tag in ("tr", "td", "th"):
  22.             self.data = True
  23.  
  24.         elif tag == "img":
  25.             if attrs[0][1] == "image-hero image-icon":
  26.                 self.player_stats_cell = attrs[4][1]
  27.  
  28.     def handle_data(self, data):
  29.         if self.data:
  30.             self.player_stats_cell = data
  31.  
  32.     def handle_endtag(self, tag):
  33.  
  34.         if tag in ('td', 'th'):
  35.             cell = self.player_stats_cell
  36.             self.player_stats_row.append(cell)
  37.             self.player_stats_cell = []
  38.             self.data = False
  39.  
  40.         elif tag == 'tr':
  41.             row = self.player_stats_row
  42.             if row:
  43.                 self.player_stats_table.append(row)
  44.                 self.player_stats_row = []
  45.                 self.table_row = False
  46.  
  47. parser = dotaBuffHTMLParser()
  48. parser.feed(response.read())
  49.  
  50. print(parser.player_stats_table)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement