import lxml.html doc = lxml.html.parse('test.html') rows = doc.xpath('//tr') data = [row.xpath('./*/text()') for row in rows]