Advertisement
elsemTim

schedule_parse_data_rata

Feb 19th, 2018
160
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.02 KB | None | 0 0
  1. #! /usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3.  
  4. from BeautifulSoup import BeautifulSoup
  5. import requests
  6. import codecs
  7. import io
  8. import json
  9. import collections
  10.  
  11.  
  12. def convert(data):
  13.     if isinstance(data, basestring):
  14.         return str(data)
  15.     elif isinstance(data, collections.Mapping):
  16.         return dict(map(convert, data.iteritems()))
  17.     elif isinstance(data, collections.Iterable):
  18.         return type(data)(map(convert, data))
  19.     else:
  20.         return data
  21.  
  22. r = BeautifulSoup(requests.get('http://ratadubna.ru/schedule/').text)
  23.  
  24. data = []
  25.  
  26. days = [0, 5, 6]
  27.  
  28. stops = r.find("div", { "class" : "schedule" }).find('div').find('ul').findAll('li')
  29.  
  30. for d in days:
  31.  
  32.     for s in stops:
  33.  
  34.         s_id = s.find('a')['data-stop-id']
  35.         s_name = s.find('a').text.encode('utf8')
  36.  
  37.         s = BeautifulSoup(requests.get('http://ratadubna.ru/nav/schedule.php', params={'o': 3, 's':s_id, 't':d}).text)
  38.  
  39.         stop = {'id' : s_id, 'day' : d, 'name' : s_name, 'buses' : []}
  40.  
  41.         print type(s_name), s_name
  42.  
  43.         body = s.findAll('table', {"class" : "tableSched"})
  44.        
  45.         if len(body) == 0:
  46.             data.append(stop)
  47.             continue
  48.  
  49.         trs = s.find('table', {"class" : "tableSched"}).findAll('tr')
  50.  
  51.         hours = list(map(lambda x: int(x.text), trs[0].findAll('td')[1:]))
  52.  
  53.         for i in range(1, len(trs), 2):
  54.  
  55.             id = trs[i].findAll('td')[0].text
  56.  
  57.             bus = {'id' : int(id), 'hours': []}    
  58.  
  59.             r = trs[i+1].findAll('td')
  60.             for j in range(0, len(hours)):
  61.                 mins = r[j].findAll('div')
  62.                 minutes = list(map(lambda x: int(x.find('p').text), list(mins)))
  63.                 hour = {'hour': hours[j], 'minutes': minutes}
  64.                 bus['hours'].append(hour)
  65.                
  66.             stop['buses'].append(bus)
  67.  
  68.         data.append(stop)
  69.    
  70.    
  71. file = open("/Users/timurmuhortov/Desktop/schedule.json", "w")
  72. file.write(str(data).encode('utf8'))
  73. file.close()
  74.  
  75. #print str(data).decode("utf-8")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement