Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/env python
- # -*- coding: utf-8 -*-
- from BeautifulSoup import BeautifulSoup
- import requests
- import codecs
- import io
- import json
- import collections
- def convert(data):
- if isinstance(data, basestring):
- return str(data)
- elif isinstance(data, collections.Mapping):
- return dict(map(convert, data.iteritems()))
- elif isinstance(data, collections.Iterable):
- return type(data)(map(convert, data))
- else:
- return data
- r = BeautifulSoup(requests.get('http://ratadubna.ru/schedule/').text)
- data = []
- days = [0, 5, 6]
- stops = r.find("div", { "class" : "schedule" }).find('div').find('ul').findAll('li')
- for d in days:
- for s in stops:
- s_id = s.find('a')['data-stop-id']
- s_name = s.find('a').text.encode('utf8')
- s = BeautifulSoup(requests.get('http://ratadubna.ru/nav/schedule.php', params={'o': 3, 's':s_id, 't':d}).text)
- stop = {'id' : s_id, 'day' : d, 'name' : s_name, 'buses' : []}
- print type(s_name), s_name
- body = s.findAll('table', {"class" : "tableSched"})
- if len(body) == 0:
- data.append(stop)
- continue
- trs = s.find('table', {"class" : "tableSched"}).findAll('tr')
- hours = list(map(lambda x: int(x.text), trs[0].findAll('td')[1:]))
- for i in range(1, len(trs), 2):
- id = trs[i].findAll('td')[0].text
- bus = {'id' : int(id), 'hours': []}
- r = trs[i+1].findAll('td')
- for j in range(0, len(hours)):
- mins = r[j].findAll('div')
- minutes = list(map(lambda x: int(x.find('p').text), list(mins)))
- hour = {'hour': hours[j], 'minutes': minutes}
- bus['hours'].append(hour)
- stop['buses'].append(bus)
- data.append(stop)
- file = open("/Users/timurmuhortov/Desktop/schedule.json", "w")
- file.write(str(data).encode('utf8'))
- file.close()
- #print str(data).decode("utf-8")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement