Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python -tt
- # -*- coding: UTF-8 -*-
- # vim: ts=4 sw=4 et ai si
- from lxml import etree
- url = "http://vesna.yandex.ru/all.xml?mix=astronomy%2Cgeology%2Cgyroscope%2Cliterature%2Cmarketing%2Cmathematics%2Cmusic%2Cpolit%2Cagrobiologia%2Claw%2Cpsychology%2Cgeography%2Cphysics%2Cphilosophy%2Cchemistry%2Cestetica&astronomy=on&geology=on&gyroscope=on&literature=on&marketing=on&mathematics=on&music=on&polit=on&agrobiologia=on&law=on&psychology=on&geography=on&physics=on&philosophy=on&chemistry=on&estetica=on"
- parser = etree.HTMLParser(recover=True)
- doc = etree.parse(url, parser)
- # <td colspan="9" class="text"><div style="min-height:333px; height:expression('333px');">
- e = doc.xpath('//td[@colspan="9"]/div')[0]
- # only text of the children
- for x in e.iterchildren():
- print x.text
- # entire structure
- print etree.tounicode(e, pretty_print=True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement