Yandex (vesna.yandex.ru) parser example

#!/usr/bin/python -tt
# -*- coding: UTF-8 -*-
# vim: ts=4 sw=4 et ai si

from lxml import etree

url = "http://vesna.yandex.ru/all.xml?mix=astronomy%2Cgeology%2Cgyroscope%2Cliterature%2Cmarketing%2Cmathematics%2Cmusic%2Cpolit%2Cagrobiologia%2Claw%2Cpsychology%2Cgeography%2Cphysics%2Cphilosophy%2Cchemistry%2Cestetica&astronomy=on&geology=on&gyroscope=on&literature=on&marketing=on&mathematics=on&music=on&polit=on&agrobiologia=on&law=on&psychology=on&geography=on&physics=on&philosophy=on&chemistry=on&estetica=on"

parser = etree.HTMLParser(recover=True)
doc = etree.parse(url, parser)

# <td colspan="9" class="text"><div style="min-height:333px; height:expression('333px');">
e = doc.xpath('//td[@colspan="9"]/div')[0]

# only text of the children
for x in e.iterchildren():
    print x.text

# entire structure
print etree.tounicode(e, pretty_print=True)