Advertisement
Guest User

Yandex Python Parser

a guest
Jul 2nd, 2010
1,469
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.31 KB | None | 0 0
  1. # coding: utf-8
  2. import urllib
  3. import sys
  4. from BeautifulSoup import BeautifulSoup
  5.  
  6. #"""Fake class for serialization"""
  7. class A(object):pass
  8.    
  9. def A2XML(A):
  10.     result = u'<train><time>%s</time><url>%s</url><note>%s</note></train>' % (A.time, u'not realized', A.note, )
  11.     return result
  12.        
  13. def dumpXML(url):
  14.     archives_url = url
  15.     archives_html = urllib.urlopen(archives_url)
  16.     soup = BeautifulSoup(unicode(archives_html.read(),'utf8'))
  17.    
  18.     direction = ''.join(soup.find("h2", { "class" : "b-holster b-title"}).contents[0])
  19.    
  20.     rasp = []
  21.     for i in soup.findAll("span", { "class" : "time " }):
  22.         aaa = A()
  23.         aaa.href = i.a['href']
  24.         aaa.time = ''.join(i.a.contents[0])
  25.         aaa.note = ''.join(i.nextSibling.contents[0])
  26.         rasp.append(aaa)
  27.     raspstring = [ u'<rasp name="%s">' % (direction,) ,]
  28.     for aaa in rasp: raspstring.append(A2XML(aaa))
  29.     raspstring.append('</rasp')
  30.    
  31.    
  32.     resultSoup = BeautifulSoup(''.join(raspstring))
  33.     return resultSoup.prettify()
  34.  
  35. if __name__ == "__main__":
  36.     if (len(sys.argv) < 2):
  37.         print u'Необходимо задать путь к расписанию в качестве единственного аргумента скрипта.'
  38.         exit(1)
  39.     print dumpXML(str(sys.argv[1]))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement