Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- url = "http://www.dotabuff.com/players/68242248/heroes"
- user_agent = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'
- headers = {'User-Agent': user_agent}
- request = urllib2.Request(url, None, headers)
- response = urllib2.urlopen(request)
- class dotaBuffHTMLParser(HTMLParser):
- def __init__(self):
- HTMLParser.__init__(self)
- self.data = False
- self.table_headings = []
- self.player_stats_table = []
- self.player_stats_row = []
- self.player_stats_cell = ""
- def handle_starttag(self, tag, attrs):
- if tag in ("tr", "td", "th"):
- self.data = True
- elif tag == "img":
- if attrs[0][1] == "image-hero image-icon":
- self.player_stats_cell = attrs[4][1]
- def handle_data(self, data):
- if self.data:
- self.player_stats_cell = data
- def handle_endtag(self, tag):
- if tag in ('td', 'th'):
- cell = self.player_stats_cell
- self.player_stats_row.append(cell)
- self.player_stats_cell = []
- self.data = False
- elif tag == 'tr':
- row = self.player_stats_row
- if row:
- self.player_stats_table.append(row)
- self.player_stats_row = []
- self.table_row = False
- parser = dotaBuffHTMLParser()
- parser.feed(response.read())
- print(parser.player_stats_table)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement