Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/env python
- # -*- coding: utf-8 -*-
- import xmltodict
- import os
- import ujson
- from datetime import datetime
- from kw.bananabox.base.scraper_builder import ScraperBuilder
- class Scraper(ScraperBuilder):
- name = 'BR'
- max_layover_time = None
- nonseparable_roundtrip_prices = True
- def load_date(self, date, return_date):
- pass
- def after_load_date_check(self, required_directions):
- containers = {
- 'departing': 0,
- 'returning': 1,
- }
- path = './BR'
- for file in os.listdir(path):
- if not file.endswith('.xml'):
- continue
- fullname = os.path.join(path, file)
- print fullname
- with open(fullname, "r") as f:
- doc = xmltodict.parse(f.read())
- doc = ujson.loads(ujson.dumps(doc))
- for direction in required_directions:
- print 'is here'
- yield (direction, {'trips': doc, 'direction': containers[direction]})
- break
- def parse_flights(self, data, date, scraping_date):
- # print data
- segkey_to_segment = self.parse_segments(data)
- for flight in data['trips']['AirShoppingRS']['OffersGroup']['AirlineOffers']['AirlineOffer']:
- segments = []
- fl = flight['PricedOffer']['Associations'][data['direction']]
- try:
- segments.append(segkey_to_segment[fl['ApplicableFlight']['FlightSegmentReference']['@ref']])
- except TypeError:
- for seg in fl['ApplicableFlight']['FlightSegmentReference']:
- print
- segments.append(segkey_to_segment[seg['@ref']])
- print fl
- yield {
- 'segments': segments,
- 'lowest_fare': {
- 'value': float(flight['TotalPrice']['SimpleCurrencyPrice']['#text'].encode('utf-8')),
- 'currency': flight['TotalPrice']['SimpleCurrencyPrice']['@Code'],
- },
- }
- @staticmethod
- def parse_segments(data):
- segkey_to_segment = {}
- for flight in data['trips']['AirShoppingRS']['DataLists']['FlightSegmentList']['FlightSegment']:
- segkey = flight['@SegmentKey']
- dpt = flight['Departure']['Date'] + flight['Departure']['Time']
- arr = flight['Arrival']['Date'] + flight['Arrival']['Time']
- segment = {
- 'airline': flight['MarketingCarrier']['AirlineID'],
- 'flight_number': flight['MarketingCarrier']['FlightNumber'],
- 'from_airport': flight['Departure']['AirportCode'],
- 'to_airport': flight['Arrival']['AirportCode'],
- 'dept_time': datetime.strptime(dpt, '%Y-%m-%d%H:%M'),
- 'arrv_time': datetime.strptime(arr, '%Y-%m-%d%H:%M'),
- }
- segkey_to_segment.update({
- segkey: segment
- })
- return segkey_to_segment
- if __name__ == '__main__':
- Scraper().run()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement