Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class MyHTMLParser(HTMLParser):
- def handle_starttag(self, tag, attrs):
- if tag == 'tr':
- for class in attrs:
- if class == 'Table_row'
- p = MyHTMLParser()
- p.feed(ht)
- <table class="Table_rows" cellspacing="0" rules="all" border="1" id="MyDataGrid" style="width:700px;border-collapse:collapse;">
- <tr class="Table_Heading">
- <td>STATION CODE</td><td>STATION NAME</td><td>SCHEDULED ARRIVAL</td><td>SCHEDULED DEPARTURE</td><td>ACTUAL/ EXPECTED ARRIVAL</td><td>ACTUAL/ EXPECTED DEPARTURE</td>
- </tr><tr class="Table_row">
- <td>TVC </td><td style="width:160px;">ORIGON</td><td>Starting Station </td><td>05:00, 07 May 2011</td><td>Starting Station</td><td>05:00, 07 May 2011</td>
- </tr><tr class="alternat_table_row">
- <td>TVP </td><td>NEY YORK</td><td>05:04, 07 May 2011</td><td>05:05, 07 May 2011</td><td>05:04, 07 May 2011</td><td>05:05, 07 May 2011</td>
- </tr>
- </table>
- from HTMLParser import HTMLParser
- class MyHTMLParser(HTMLParser):
- def handle_starttag(self, tag, attrs):
- if tag == 'tr':
- for name, value in attrs:
- if name == 'class':
- print 'Found class', value
- p = MyHTMLParser()
- p.feed(ht)
- Found class Table_Heading
- Found class Table_row
- Found class alternat_table_row
- from BeautifulSoup import BeautifulSoup
- html = '''
- <td>STATION CODE</td><td>STATION NAME</td><td>SCHEDULED ARRIVAL</td><td>SCHEDULED DEPARTURE</td><td>ACTUAL/ EXPECTED ARRIVAL</td><td>ACTUAL/ EXPECTED DEPARTURE</td>
- </tr><tr class="Table_row">
- <td>TVC </td><td style="width:160px;">ORIGON</td><td>Starting Station </td><td>05:00, 07 May 2011</td><td>Starting Station</td><td>05:00, 07 May 2011</td>
- '''
- soup = BeautifulSoup(html)
- tag = soup.findAll('td', limit=2)
- tag_O = soup.findAll('td')[7]
- for i in range(len(tag)):
- print tag[i].string
- print tag_O.string
- '''Output-->
- STATION CODE
- STATION NAME
- ORIGON
- '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement