- Parse HTML in Python using lxml, xpath
- <table>
- <tr>
- <td class="u"><input class="wide" name="record[13][name]" value="exampledomain1.com"></td>
- <td class="u">
- <select name="record[13][type]">
- <option SELECTED value="A" >A</option>
- <option value="AAAA" >AAAA</option>
- <option value="CNAME" >CNAME</option>
- <option value="HINFO" >HINFO</option>
- <option value="MX" >MX</option>
- <option value="NAPTR" >NAPTR</option>
- <option value="NS" >NS</option>
- <option value="PTR" >PTR</option>
- <option value="SOA" >SOA</option>
- <option value="SPF" >SPF</option>
- <option value="SRV" >SRV</option>
- <option value="SSHFP" >SSHFP</option>
- <option value="TXT" >TXT</option>
- <option value="RP" >RP</option>
- <option value="URL" >URL</option>
- <option value="MBOXFW" >MBOXFW</option>
- <option value="CURL" >CURL</option>
- </select>
- </td>
- <td class="u"><input class="wide" name="record[13][content]" value='10.10.10.1'></td>
- <td class="u"><input class="wide" name="record[14][name]" value="exampledomain2.com"></td>
- <td class="u">
- <select name="record[14][type]">
- <option SELECTED value="CNAME" >A</option>
- <option value="AAAA" >AAAA</option>
- <option value="CNAME" >CNAME</option>
- <option value="HINFO" >HINFO</option>
- <option value="MX" >MX</option>
- <option value="NAPTR" >NAPTR</option>
- <option value="NS" >NS</option>
- <option value="PTR" >PTR</option>
- <option value="SOA" >SOA</option>
- <option value="SPF" >SPF</option>
- <option value="SRV" >SRV</option>
- <option value="SSHFP" >SSHFP</option>
- <option value="TXT" >TXT</option>
- <option value="RP" >RP</option>
- <option value="URL" >URL</option>
- <option value="MBOXFW" >MBOXFW</option>
- <option value="CURL" >CURL</option>
- </select>
- </td>
- <td class="u"><input class="wide" name="record[14][content]" value='exampledomain1.com'></td>
- <td class="u"><input class="wide" name="record[15][name]" value="exampledomain3.com"></td>
- <td class="u">
- <select name="record[15][type]">
- <option SELECTED value="A" >A</option>
- <option value="AAAA" >AAAA</option>
- <option value="CNAME" >CNAME</option>
- <option value="HINFO" >HINFO</option>
- <option value="MX" >MX</option>
- <option value="NAPTR" >NAPTR</option>
- <option value="NS" >NS</option>
- <option value="PTR" >PTR</option>
- <option value="SOA" >SOA</option>
- <option value="SPF" >SPF</option>
- <option value="SRV" >SRV</option>
- <option value="SSHFP" >SSHFP</option>
- <option value="TXT" >TXT</option>
- <option value="RP" >RP</option>
- <option value="URL" >URL</option>
- <option value="MBOXFW" >MBOXFW</option>
- <option value="CURL" >CURL</option>
- </select>
- </td>
- <td class="u"><input class="wide" name="record[15][content]" value='10.10.10.3'></td>
- </tr>
- </table>
- exampledomain1.com A 10.10.10.1
- exampledomain2.com CNAME exampledomain1.com
- exampledomain3.com A 10.10.10.3
- #!/usr/bin/python
- import lxml.html
- from lxml import etree
- doc = lxml.html.document_fromstring("""Here whole html data""")
- txt1 = doc.xpath('//*[@class="wide"]/@value')
- txt2 = doc.xpath('//@SELECTED/text()')
- print txt1
- print txt2
- (py26_default)[mpenning@Bucksnort ~]$ python parse.py
- exampledomain1.com 10.10.10.1
- exampledomain2.com exampledomain1.com
- exampledomain3.com 10.10.10.3
- (py26_default)[mpenning@Bucksnort ~]$