Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # coding: utf-8
- import urllib
- import sys
- from BeautifulSoup import BeautifulSoup
- #"""Fake class for serialization"""
- class A(object):pass
- def A2XML(A):
- result = u'<train><time>%s</time><url>%s</url><note>%s</note></train>' % (A.time, u'not realized', A.note, )
- return result
- def dumpXML(url):
- archives_url = url
- archives_html = urllib.urlopen(archives_url)
- soup = BeautifulSoup(unicode(archives_html.read(),'utf8'))
- direction = ''.join(soup.find("h2", { "class" : "b-holster b-title"}).contents[0])
- rasp = []
- for i in soup.findAll("span", { "class" : "time " }):
- aaa = A()
- aaa.href = i.a['href']
- aaa.time = ''.join(i.a.contents[0])
- aaa.note = ''.join(i.nextSibling.contents[0])
- rasp.append(aaa)
- raspstring = [ u'<rasp name="%s">' % (direction,) ,]
- for aaa in rasp: raspstring.append(A2XML(aaa))
- raspstring.append('</rasp')
- resultSoup = BeautifulSoup(''.join(raspstring))
- return resultSoup.prettify()
- if __name__ == "__main__":
- if (len(sys.argv) < 2):
- print u'Необходимо задать путь к расписанию в качестве единственного аргумента скрипта.'
- exit(1)
- print dumpXML(str(sys.argv[1]))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement