Advertisement
tokarevms

routes_search

Aug 30th, 2017
199
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 37.18 KB | None | 0 0
  1. # coding: utf-8
  2. """
  3. This module implements searching for airline-routes on several web-sites.
  4.  
  5. To get routes use find_routes_on_multi_search().
  6. To print results in console use print_routes().
  7. """
  8. from datetime import datetime
  9. from abc import ABCMeta, abstractmethod
  10. import io
  11. from itertools import product
  12. from lxml import html
  13. from lxml.etree import XMLSyntaxError
  14. from requests.exceptions import RequestException
  15. import requests
  16.  
  17.  
  18. class Route(object):
  19.     """Forms route.
  20.  
  21.    :param departure_date: date departure :class:'datetime' object.
  22.    :param route_prices: tuple, first element - dictionary(faretype=price), faretype, price are strings, second element - currency
  23.     string
  24.    :param route_duration: route duration string
  25.    :param origin_city: city origin string
  26.    :param destination_city: city destination string
  27.    :param route_flights: list of :class:'Flight' objects
  28.    """
  29.     def __init__(self, departure_date, route_prices, route_duration, origin_city,
  30.                  destination_city, route_flights):
  31.         self.route_flights = route_flights
  32.         self.route_prices = route_prices
  33.         self.route_duration = route_duration
  34.         self.origin_city = origin_city
  35.         self.destination_city = destination_city
  36.         self.departure_date = departure_date
  37.         self.lowest_price = self.find_lowest_price()
  38.  
  39.     def find_lowest_price(self):
  40.         """Finds the lowest price of this route.
  41.  
  42.        :return: float
  43.        """
  44.         all_route_prices = self.route_prices[0].values()
  45.         lowest_price = min(map(float, all_route_prices))
  46.         return lowest_price
  47.  
  48.     def __repr__(self):
  49.         repr_string = u'_' * 40
  50.         repr_string += u'\n{0}\t{1}\t{2}\t{3}\t{4} {5}\n'\
  51.             .format(self.origin_city, self.departure_date.date(), self.route_duration,
  52.                     self.destination_city, self.lowest_price, self.route_prices[1])
  53.         repr_string += u'Prices[{}]:\n'.format(self.route_prices[1])
  54.         for fare_type in self.route_prices[0]:
  55.             repr_string += u'{0} - {1}\n'.format(fare_type, self.route_prices[0][fare_type])
  56.         for flight in self.route_flights:
  57.             repr_string += str(flight)
  58.         return repr_string.encode('utf-8')
  59.  
  60.  
  61. class Flight(object):
  62.     """Forms flight.
  63.  
  64.    :param flight_code: flight code string
  65.    :param departure_time: time departure :class:'datetime' object or string
  66.    :param arrival_time: time arrival :class:'datetime' object or string
  67.    :param origin_code_iata: origin IATA code string
  68.    :param destination_code_iata: destination IATA code string
  69.    """
  70.     def __init__(self, flight_code, departure_time, arrival_time, origin_code_iata,
  71.                  destination_code_iata):
  72.         self.departure_time = departure_time
  73.         self.arrival_time = arrival_time
  74.         self.flight_code = flight_code
  75.         self.origin_code_iata = origin_code_iata
  76.         self.destination_code_iata = destination_code_iata
  77.  
  78.     def __repr__(self):
  79.         repr_string = u'\n\t{0}\t{1}\t{2} ---> {3}\t{4}'\
  80.             .format(self.flight_code, self.departure_time, self.origin_code_iata,
  81.                     self.destination_code_iata, self.arrival_time)
  82.         return repr_string.encode('utf-8')
  83.  
  84.  
  85. class SearchForRoutes(object):
  86.     """Base class for searching on sites.
  87.  
  88.    :param origin_code_iata: origin IATA code string
  89.    :param destination_code_iata: destination code IATA string
  90.    :param route_dates: list of departure and return dates :class:'datetime' objects
  91.    """
  92.     __metaclass__ = ABCMeta
  93.  
  94.     def __init__(self, origin_code_iata, destination_code_iata, route_dates):
  95.         self.search_request_url = ''
  96.         self.validation_url = ''
  97.         self.route_dates = route_dates
  98.         self.origin_code_iata = origin_code_iata
  99.         self.destination_code_iata = destination_code_iata
  100.         self.routes_found = []
  101.         self.output_file_encoding = 'utf-8'
  102.  
  103.     def _decode_json(self, response_json):    # Decodes JSON
  104.         try:
  105.             decoded_json = response_json.json()
  106.         except ValueError:
  107.             raise ValueError('({})JSON decoding failed, routes not found'
  108.                              .format(self.__class__.__name__))
  109.         return decoded_json
  110.  
  111.     def _decode_html_string(self, html_string):    # Parses string and creates DOM
  112.         try:
  113.             page_html = html.fromstring(html_string)
  114.         except XMLSyntaxError:
  115.             raise ValueError('({})HTML string failed, routes not found'
  116.                              .format(self.__class__.__name__))
  117.         return page_html
  118.  
  119.     @abstractmethod
  120.     def make_request_params(self, session):
  121.         """Makes parameters for searching request.
  122.  
  123.        :param session: for current search-request on particular web-site :class:'Session'
  124.        :return tuple: HTTP-request method string and dictionary of special parameters
  125.        (headers, body, JSON etc.)
  126.        """
  127.         return None, None
  128.  
  129.     @abstractmethod
  130.     def validate_codes_iata(self, session):
  131.         """Validates codes IATA for search-request.
  132.  
  133.        :param session: for current search-request on particular web-site :class:'Session'
  134.        :return list of two valid codes IATA (or other parameters instead codes,
  135.        which needs for search)
  136.        if no valid dates then raises ValueError
  137.        """
  138.         return None, None
  139.  
  140.     def send_request_and_check(self, session, request_method, url, request_params):
  141.         """Sends request and checks server response.
  142.  
  143.        :param session: for current search-request on particular web-site :class:'Session'
  144.        :param request_method: HTTP-request method string
  145.        :param url: URL for request
  146.        :param request_params: of special parameters (headers, body, JSON etc.) for request
  147.        :return :class:'Response' object
  148.        if request failed then raises RequestException
  149.        """
  150.         try:
  151.             response = session.request(request_method, url, **request_params)
  152.             response.raise_for_status()
  153.         except RequestException, error:
  154.             print '({}) request failed'.format(self.__class__.__name__)
  155.             raise error
  156.         return response
  157.  
  158.     def search_request(self):
  159.         """Make search-request and finds routes."""
  160.         session = requests.Session()
  161.         self.origin_code_iata, self.destination_code_iata = self.validate_codes_iata(session)
  162.         request_method, request_params = self.make_request_params(session)
  163.         search_response = self.send_request_and_check(session, request_method,
  164.                                                       self.search_request_url, request_params)
  165.         self.routes_found = self.extract_data_from_response(search_response)
  166.  
  167.     @abstractmethod
  168.     def extract_data_from_response(self, response):
  169.         """Extracts useful information from server response.
  170.  
  171.        :param response: :class:'Response' object
  172.        :return list of :class:'Route" objects
  173.        """
  174.         pass
  175.  
  176.     def output_extracted_routes(self):
  177.         """Prints routes' information on console and writes in Result.txt file."""
  178.         with io.open('Result.txt', 'a', encoding=self.output_file_encoding) as output_file:
  179.             for route in self.routes_found:
  180.                 print route
  181.                 output_file.write(route)
  182.         print "Export of results is completed"
  183.  
  184.     def search(self):
  185.         """Search on particular web-site and output routes' information on console and file."""
  186.         self.search_request()
  187.         if self.routes_found:
  188.             self.output_extracted_routes()
  189.  
  190.  
  191. class AeroflotSearch(SearchForRoutes):
  192.     """Route-search on Aeroflot web-site."""
  193.     def __init__(self, origin_code_iata, destination_code_iata, route_dates):
  194.         SearchForRoutes.__init__(self, origin_code_iata, destination_code_iata, route_dates)
  195.         self.search_request_url = 'https://www.aeroflot.ru/sb/booking/api/app/search/v2'
  196.         self.validation_url = 'https://www.aeroflot.ru/sb/booking/api/app/cities/v1'
  197.  
  198.     @staticmethod
  199.     def _find_in_json(code, cities):    # Finds code IATA in JSON for validation
  200.         for city in cities['data']['cities']:
  201.             if code == city['code']:
  202.                 return code
  203.             else:
  204.                 for airport in city['airports']:
  205.                     if code == airport['code']:
  206.                         return code
  207.         return None
  208.  
  209.     @staticmethod
  210.     def _make_validation_request_params():    # Makes validation request parameters
  211.         validation_params = {'json': {'lang': 'ru'}}
  212.         return validation_params
  213.  
  214.     def validate_codes_iata(self, session):
  215.         """Validates codes IATA for search-request for Aeroflot web-site."""
  216.         valid_codes_iata = []
  217.         validation_params = self._make_validation_request_params()
  218.         cities_json = self.send_request_and_check(session, 'POST', self.validation_url,
  219.                                                   validation_params)
  220.         cities = self._decode_json(cities_json)
  221.         for code_iata in self.origin_code_iata, self.destination_code_iata:
  222.             code_found = self._find_in_json(code_iata, cities)
  223.             if not code_found:
  224.                 raise ValueError('(AeroflotSearch) invalid code IATA: {}'.format(code_iata))
  225.             valid_codes_iata.append(code_found)
  226.         print '(AeroflotSearch) valid codes IATA'
  227.         return valid_codes_iata
  228.  
  229.     def make_request_params(self, session):
  230.         """Makes HTTP-request method and parameters for Aeroflot."""
  231.         origin_code_iata = self.origin_code_iata
  232.         destination_code_iata = self.destination_code_iata
  233.         json_param_routes = []
  234.         for date in self.route_dates:
  235.             route = {
  236.                 "origin": origin_code_iata,
  237.                 "destination": destination_code_iata,
  238.                 "departure": date.strftime('%Y-%m-%d')
  239.             }
  240.             json_param_routes.append(route)
  241.             origin_code_iata, destination_code_iata = destination_code_iata, origin_code_iata
  242.         json_params = {
  243.             "routes": json_param_routes,
  244.             "cabin": "econom",
  245.             "country": "ru",
  246.             "adults": 1,
  247.             "combined": False,
  248.             "lang": "ru"
  249.         }
  250.         search_request_method = 'POST'
  251.         search_request_params = {'json': json_params}
  252.         return search_request_method, search_request_params
  253.  
  254.     @staticmethod
  255.     def _extract_route_prices(route):    # Extracts dictionary of prices from route info
  256.         route_prices = dict()
  257.         currency = route['prices'][0]['currency']
  258.         for price in route['prices']:
  259.             route_price = price['total_amount']
  260.             route_faretype = price['fare_group_name']
  261.             route_prices[route_faretype] = route_price
  262.         return route_prices, currency
  263.  
  264.     @staticmethod
  265.     def _extract_flights_from_route(route):    # Extracts flights from route info
  266.         route_flights = []
  267.         for leg in route['legs']:
  268.             for flight in leg['segments']:
  269.                 number = flight['flight_number']
  270.                 air_code = flight['airline_code']
  271.                 raw_departure_time = flight['departure']
  272.                 departure_time = datetime.strptime(raw_departure_time, '%Y-%m-%d %H:%M')
  273.                 arrival_time = flight['arrival'][-5:]
  274.                 origin_code_iata = flight['origin']['airport_code']
  275.                 destination_code_iata = flight['destination']['airport_code']
  276.                 one_flight = Flight(air_code + number, departure_time, arrival_time,
  277.                                     origin_code_iata, destination_code_iata)
  278.                 route_flights.append(one_flight)
  279.         return route_flights
  280.  
  281.     def _extract_routes(self, direction):    # Extracts routes from direction
  282.         routes_extracted = []
  283.         city_origin = direction[0]['legs'][0]['segments'][0]['origin']['city_name']
  284.         city_destination = direction[0]['legs'][0]['segments'][-1]['destination']['city_name']
  285.         raw_departure_date = direction[0]['legs'][0]['segments'][0]['departure'][:10]
  286.         departure_date = datetime.strptime(raw_departure_date, '%Y-%m-%d')
  287.         for route in direction:
  288.             route_prices = self._extract_route_prices(route)
  289.             route_flights = self._extract_flights_from_route(route)
  290.             route_duration = route['time_name']
  291.             one_route = Route(departure_date, route_prices, route_duration, city_origin,
  292.                               city_destination, route_flights)
  293.             routes_extracted.append(one_route)
  294.         return routes_extracted
  295.  
  296.     def extract_data_from_response(self, response):
  297.         """Extracts routes from Aeroflot JSON-response."""
  298.         routes_found = []
  299.         json_routes = self._decode_json(response)
  300.         if not json_routes['data']['itineraries']:
  301.             raise ValueError('(AeroflotSearch) routes not found')
  302.         for direction in json_routes['data']['itineraries']:
  303.             routes_found += self._extract_routes(direction)
  304.         return routes_found
  305.  
  306.  
  307. class NordwindSearch(SearchForRoutes):
  308.     """Route-search on Nordwind web-site."""
  309.     def __init__(self, origin_code_iata, destination_code_iata, route_dates):
  310.         SearchForRoutes.__init__(self, origin_code_iata, destination_code_iata, route_dates)
  311.         self.search_request_url = 'https://airbook.nordwindairlines.ru/online/json/' \
  312.                                   'search-variants-mono-brand-cartesian'
  313.         self.validation_url = 'https://airbook.nordwindairlines.ru/online/json/dependence-cities'
  314.  
  315.     @staticmethod
  316.     def _find_in_json(code, cities):    # Finds code IATA in JSON for validation
  317.         for city in cities['origin']:
  318.             if code == city['codeEn']:
  319.                 return code
  320.         return None
  321.  
  322.     def validate_codes_iata(self, session):
  323.         """Validates codes IATA for search-request for Nordwind web-site."""
  324.         valid_codes_iata = []
  325.         validation_params = {}
  326.         cities_json = self.send_request_and_check(session, 'GET', self.validation_url,
  327.                                                   validation_params)
  328.         cities = self._decode_json(cities_json)
  329.         for code_iata in self.origin_code_iata, self.destination_code_iata:
  330.             code_found = self._find_in_json(code_iata, cities)
  331.             if not code_found:
  332.                 raise ValueError('(NordWindSearch) invalid code IATA: {}'.format(code_iata))
  333.             valid_codes_iata.append(code_found)
  334.         print '(NordWindSearch) valid codes IATA'
  335.         return valid_codes_iata
  336.  
  337.     def make_request_params(self, session):
  338.         """Makes HTTP-request method and parameters for Nordwind."""
  339.         origin_code_iata = self.origin_code_iata
  340.         destination_code_iata = self.destination_code_iata
  341.         param_routes = {}
  342.         segment_count = 0
  343.         for date in self.route_dates:
  344.             route = {
  345.                 'date[%s]' % segment_count: date.strftime('%d.%m.%Y'),
  346.                 'origin-city-code[%s]' % segment_count: origin_code_iata,
  347.                 'destination-city-code[%s]' % segment_count: destination_code_iata
  348.             }
  349.             origin_code_iata, destination_code_iata = destination_code_iata, origin_code_iata
  350.             param_routes.update(route)
  351.             segment_count += 1
  352.         requst_params = {
  353.             'segmentsCount': segment_count,
  354.             'lang': 'ru',
  355.             'count-aaa': 1
  356.         }
  357.         requst_params.update(param_routes)
  358.         params = {'params': requst_params}
  359.         method = 'GET'
  360.         return method, params
  361.  
  362.     @staticmethod
  363.     def _extract_flights_from_route(route):    # Extracts flights from route info
  364.         route_flights = []
  365.         for flight in route['flights']:
  366.             number = flight['racenumber']
  367.             air_code = flight['carrier']
  368.             depart_time = flight['departuretime']
  369.             depart_date = flight['departuredate']
  370.             departure_time = datetime.strptime(depart_date + depart_time, '%d.%m.%Y%H:%M')
  371.             arrival_time = flight['arrivaltime']
  372.             origin_code_iata = flight['originport']
  373.             destination_code_iata = flight['destinationport']
  374.             one_flight = Flight(air_code + number, departure_time,
  375.                                 arrival_time, origin_code_iata, destination_code_iata)
  376.             route_flights.append(one_flight)
  377.         return route_flights
  378.  
  379.     @staticmethod
  380.     def _extract_route_prices(routes_price_list, chain_id):    # Extracts prices from route info
  381.         route_prices = dict()
  382.         currency = routes_price_list[chain_id][0]['currency']
  383.         for price in routes_price_list[chain_id]:
  384.             route_price = price['price']
  385.             route_faretype = price['brand'][:12]
  386.             route_prices[route_faretype] = route_price
  387.         return route_prices, currency
  388.  
  389.     def _extract_routes(self, json_routes, routes_price_list):    # Extracts routes from JSON
  390.         extracted_routes = []
  391.         for route in json_routes['flights']:
  392.             chain_id = route['chainId']
  393.             route_flights = self._extract_flights_from_route(route)
  394.             route_prices = self._extract_route_prices(routes_price_list, chain_id)
  395.             route_duration = route['flights'][0]['flighttime']
  396.             city_origin = route['flights'][0]['origincityName']
  397.             city_destination = route['flights'][0]['destinationcityName']
  398.             departure_date = datetime.strptime(route['flights'][0]['departuredate'], '%d.%m.%Y')
  399.             one_route = Route(departure_date, route_prices, route_duration, city_origin,
  400.                               city_destination, route_flights)
  401.             extracted_routes.append(one_route)
  402.         return extracted_routes
  403.  
  404.     def extract_data_from_response(self, response):
  405.         """Extracts routes from Nordwind JSON-response."""
  406.         json_routes = self._decode_json(response)
  407.         if 'error' in json_routes:
  408.             raise ValueError('(NordwindSearch) routes not found')
  409.         routes_price_list = {}
  410.         for prices in json_routes['prices']:
  411.             routes_price_list.update(prices)
  412.         routes_found = self._extract_routes(json_routes, routes_price_list)
  413.         return routes_found
  414.  
  415.  
  416. class FlynikiSearch(SearchForRoutes):
  417.     """Route-search on Flyniki web-site."""
  418.     def __init__(self, origin_code_iata, destination_code_iata, route_dates):
  419.         SearchForRoutes.__init__(self, origin_code_iata, destination_code_iata, route_dates)
  420.         self.search_request_url = 'https://www.flyniki.com/en/booking/flight/vacancy.php'
  421.         self.validation_url = 'https://www.flyniki.com/en/site/json/suggestAirport.php'
  422.  
  423.     @staticmethod
  424.     def _find_in_json(code, cities):    # Finds code IATA in JSON for validation, returns city name
  425.         for city in cities['suggestList']:
  426.             if code == city['code']:
  427.                 return city['name']
  428.         return None
  429.  
  430.     def validate_codes_iata(self, session):
  431.         """Validates codes IATA for search-request for Flyniki web-site.
  432.  
  433.        :param session: for current search-request on particular web-site :class:'Session'
  434.        :return list of two city names, which are used for creating search-request
  435.        """
  436.         valid_cities = []
  437.         payload = {'searchflightid': 0,
  438.                    'suggestsource[]': 'activeairports',
  439.                    'departures[]': self.origin_code_iata,
  440.                    'destinations[]': self.destination_code_iata,
  441.                    'routesource[0]': 'airberlin',
  442.                    'routesource[1]': 'partner'}
  443.         validation_params = {'params': payload}
  444.         for search_for in 'departures', 'destinations':
  445.             validation_params['params'].update(dict(searchfor=search_for))
  446.             key = '{}[]'.format(search_for)
  447.             code_iata = validation_params['params'][key]
  448.             cities_json = self.send_request_and_check(session, 'GET', self.validation_url,
  449.                                                       validation_params)
  450.             cities = self._decode_json(cities_json)
  451.             if not cities['suggestList']:
  452.                 raise ValueError('(FlyNikiSearch) invalid code IATA: {}'.format(code_iata))
  453.             city_found = self._find_in_json(code_iata, cities)
  454.             if not city_found:
  455.                 raise ValueError('(FlyNikiSearch) invalid code IATA: {}'.format(code_iata))
  456.             valid_cities.append(city_found)
  457.         print '(FlynikiSearch) valid codes IATA'
  458.         return valid_cities
  459.  
  460.     def get_sid(self, session):
  461.         """Makes simple request and returns SID number for future search.
  462.  
  463.        :param session: for current search-request on particular web-site :class:'Session'
  464.        :return sid string
  465.        """
  466.         sid_request_headers = {'Content-Type': 'application/x-www-form-urlencoded'}
  467.         sid_request_body = r'market=RU&language=en&bookingmask_' \
  468.                            r'widget_dateformat=dd/mm/yy&returnDate={}'\
  469.             .format(datetime.now().strftime('%d/%m/%Y'))
  470.         sid_request_params = {'headers': sid_request_headers, 'data': sid_request_body}
  471.         sid_request_url = 'https://www.flyniki.com/en/start.php'
  472.         response = self.send_request_and_check(session, 'POST', sid_request_url, sid_request_params)
  473.         sid = response.url.split('=')[1]
  474.         return sid
  475.  
  476.     def make_request_params(self, session):
  477.         """Makes HTTP-request method and parameters for Flyniki."""
  478.         origin_code_iata = self.origin_code_iata
  479.         destination_code_iata = self.destination_code_iata
  480.         sid = self.get_sid(session)
  481.         request_data = r'_ajax[requestParams][adultCount]=1&_ajax[templates][]=main&' \
  482.                        r'_ajax[templates][]=priceoverview&_ajax[templates][]=infos&' \
  483.                        r'_ajax[templates][]=flightinfo&_ajax[requestParams][departure]={0}&' \
  484.                        r'_ajax[requestParams][destination]={1}'\
  485.             .format(origin_code_iata, destination_code_iata)
  486.         request_data += r'&_ajax[requestParams][outboundDate]={0}&' \
  487.                         r'_ajax[requestParams][returnDate]={1}'\
  488.             .format(self.route_dates[0].strftime('%Y-%m-%d'),
  489.                     self.route_dates[-1].strftime('%Y-%m-%d'))
  490.         if len(self.route_dates) == 1:
  491.             request_data += r'&_ajax[requestParams][oneway]=on'
  492.         request_headers = {'Content-Type': 'application/x-www-form-urlencoded'}
  493.         request_payload = {'sid': sid}
  494.         search_request_params = {'headers': request_headers, 'data': request_data,
  495.                                  'params': request_payload}
  496.         search_request_method = 'POST'
  497.         return search_request_method, search_request_params
  498.  
  499.     @staticmethod
  500.     def _extract_flights_from_route(route):    # Extracts flights from route info
  501.         route_flights = []
  502.         for flight in route.xpath('./following-sibling::tr[1]//tbody/tr'):
  503.             flight_number = flight.xpath('./td[4]/text()')[0]
  504.             departure_time = flight.xpath('./td[2]/span/time/text()')[0]
  505.             arrival_time = flight.xpath('./td[3]/span/time/text()')[0]
  506.             origin = flight.xpath('./td[2]/span/text()')[1]
  507.             destination = flight.xpath('./td[3]/span/text()')[1]
  508.             origin_code_iata = origin.split(',')[1].strip()
  509.             destination_code_iata = destination.split(',')[1].strip()
  510.             one_flight = Flight(flight_number, departure_time, arrival_time, origin_code_iata,
  511.                                 destination_code_iata)
  512.             route_flights.append(one_flight)
  513.         return route_flights
  514.  
  515.     @staticmethod
  516.     def _extract_route_prices(route):    # Extracts prices from route info
  517.         route_prices = dict()
  518.         for price in route.xpath('./td[position() > 4][label]'):
  519.             route_price = price.xpath('./label/div/span/text()')[0]
  520.             faretype = price.xpath('.//input[@name="faretype"]/@value')[0]
  521.             faregroup = price.xpath('.//input[@name="faregroup"]/@value')[0]
  522.             route_prices[faregroup + faretype] = route_price
  523.         return route_prices
  524.  
  525.     def _extract_routes(self, direction, currency):    # Extracts routes from direction
  526.         routes_extracted = []
  527.         route_title = direction.xpath('.//div[@class="vacancy_route"]/text()')[0]
  528.         cities = route_title.split(',')[0]
  529.         city_origin = cities.split(u'–')[0]
  530.         city_destination = cities.split(u'–')[1]
  531.         raw_departure_date = route_title.split(',')[-1]
  532.         departure_date = datetime.strptime(raw_departure_date.strip(), '%d/%m/%y')
  533.         for route in direction.xpath('.//tr[@role="group"]'):
  534.             route_flights = self._extract_flights_from_route(route)
  535.             route_prices = self._extract_route_prices(route), currency
  536.             route_duration = route.xpath('./td[4]/span/text()')[0]
  537.             one_route = Route(departure_date, route_prices, route_duration, city_origin,
  538.                               city_destination, route_flights)
  539.             routes_extracted.append(one_route)
  540.         return routes_extracted
  541.  
  542.     def extract_data_from_response(self, response):
  543.         """Extracts routes from Flyniki mixed JSON/HTML-response."""
  544.         routes_found = []
  545.         json_html = self._decode_json(response)
  546.         html_page = self._decode_html_string(json_html['templates']['main'])
  547.         currency_list = html_page.xpath('.//th/@aria-label')
  548.         if not currency_list:
  549.             raise ValueError('(FlynikiSearch) routes not found')
  550.         currency = currency_list[0].split()[0]
  551.         directions = html_page.xpath('//div[@id="flighttables"]/div[div[@class="row"]]')
  552.         for direction in directions:
  553.             routes_found += self._extract_routes(direction, currency)
  554.         return routes_found
  555.  
  556.  
  557. class S7Search(SearchForRoutes):
  558.     """Route-search on S7 web-site."""
  559.     def __init__(self, origin_code_iata, destination_code_iata, route_dates):
  560.         SearchForRoutes.__init__(self, origin_code_iata, destination_code_iata, route_dates)
  561.         self.search_request_url = 'https://travelwith.s7.ru/ajax/actions/updateFlightsSearch.action'
  562.         self.validation_url = 'https://www.s7.ru/app/LocationService'
  563.  
  564.     @staticmethod
  565.     def _find_in_json(code, cities):    # Finds code IATA in JSON for validation, returns
  566.         for city in cities['c']:        # special code
  567.             if code == city['iata']:
  568.                 return city['code']
  569.         return None
  570.  
  571.     def validate_codes_iata(self, session):
  572.         """Validates codes IATA for search-request for S7 web-site
  573.  
  574.        :return list of two special search-parameters
  575.        """
  576.         valid_codes_iata = []
  577.         payload = {'action': 'get_locations',
  578.                    'searchType': 'avia'}
  579.         validation_params = {'params': payload}
  580.         for code_iata in self.origin_code_iata, self.destination_code_iata:
  581.             validation_params['params'].update({'str': code_iata})
  582.             cities_json = self.send_request_and_check(session, 'GET', self.validation_url,
  583.                                                       validation_params)
  584.             cities = self._decode_json(cities_json)
  585.             if not cities['c']:
  586.                 raise ValueError('(S7Search) invalid code IATA: {}'.format(code_iata))
  587.             code_found = self._find_in_json(code_iata, cities)
  588.             if not code_found:
  589.                 raise ValueError('(S7Search) invalid code IATA: {}'.format(code_iata))
  590.             valid_codes_iata.append(code_found)
  591.         print '(S7Search) valid codes IATA'
  592.         return valid_codes_iata
  593.  
  594.     def make_request_params(self, session):
  595.         """Makes HTTP-request method and parameters for S7"""
  596.         origin_code_iata = self.origin_code_iata
  597.         destination_code_iata = self.destination_code_iata
  598.         request_data = r'model.page=FLIGHTS_SELECT_PAGE&model.milesEnabled=true&' \
  599.                        r'model.directFlightsOnly=false&model.flexible=false&' \
  600.                        r'model.redemption=false&model.currencyType=RUB&' \
  601.                        r'model.multiCitySearchType=PRICE&model.departurePoint={0}&' \
  602.                        r'model.arrivalPoint={1}&model.departureDate={2}&' \
  603.                        r'model.adultsCount=1'.format(origin_code_iata, destination_code_iata,
  604.                                                      self.route_dates[0].strftime('%d.%m.%Y'))
  605.         if len(self.route_dates) == 2:
  606.             request_data += r'&model.routeType=ROUND_TRIP&model.arrivalDate={}'\
  607.                 .format(self.route_dates[1].strftime('%d.%m.%Y'))
  608.         else:
  609.             request_data += r'&model.routeType=ONE_WAY'
  610.         request_headers = {'Content-Type': 'application/x-www-form-urlencoded'}
  611.         search_request_params = {'headers': request_headers, 'data': request_data}
  612.         search_request_method = 'POST'
  613.         return search_request_method, search_request_params
  614.  
  615.     @staticmethod
  616.     def _extract_route_prices(route):    # Extracts prices from route info
  617.         route_prices = dict()
  618.         for price in route.xpath('.//div[div[@class="radiobutton"]]'):
  619.             route_price = price.xpath('.//span[@data-qa="amount"]/text()')[0].replace(u'\xa0', '')
  620.             route_faretype = price.xpath('./@data-tariff-type')[0]
  621.             route_prices[route_faretype] = route_price
  622.         return route_prices
  623.  
  624.     @staticmethod
  625.     def _extract_flights_from_route(route):    # Extracts flights from route info
  626.         flights = route.xpath('.//div[@data-qa="extended_info"]')
  627.         route_flights = []
  628.         if len(flights) == 1:
  629.             flights_info = [route]
  630.         else:
  631.             flights_info = flights
  632.         for flight in flights_info:
  633.             flight_number = flight.xpath('.//span[@data-qa="number_flightItem"]/text()')[0]
  634.             departure_time = flight.xpath('.//time[@data-qa="timeDeparture_flightItem"]/text()')[0]
  635.             arrival_time = flight.xpath('.//time[@data-qa="timeArrived_flightItem"]/text()')[0]
  636.             origin_code_iata = \
  637.                 flight.xpath('.//span[@data-qa="airportDeparture_flightItem"]/text()')[0]
  638.             destination_code_iata = \
  639.                 flight.xpath('.//span[@data-qa="airportArrived_flightItem"]/text()')[0]
  640.             one_flight = Flight(flight_number, departure_time, arrival_time, origin_code_iata,
  641.                                 destination_code_iata)
  642.             route_flights.append(one_flight)
  643.         return route_flights
  644.  
  645.     def _extract_routes(self, index, direction, currency):    # Extracts routes from direction
  646.         routes_extracted = []
  647.         cities = direction.xpath('.//div[@class="route"]/text()')
  648.         city_origin = cities[0].strip()
  649.         city_destination = cities[2].strip()
  650.         departure_date = self.route_dates[index]
  651.         for route in direction.xpath('.//div[@data-qa="listFlight"]/div[@data-qa]'):
  652.             route_prices = self._extract_route_prices(route), currency
  653.             route_flights = self._extract_flights_from_route(route)
  654.             route_duration = \
  655.                 route.xpath('.//div[@data-qa="durationTotal_flightItemShort"]/text()')[0]
  656.             one_route = Route(departure_date, route_prices, route_duration, city_origin,
  657.                               city_destination, route_flights)
  658.             routes_extracted.append(one_route)
  659.         return routes_extracted
  660.  
  661.     def extract_data_from_response(self, response):
  662.         """Extracts routes from S7 HTML-response."""
  663.         routes_found = []
  664.         html_page = self._decode_html_string(response.text)
  665.         currency_list = html_page.xpath('//span[@data-qa="currency"][1]/text()')
  666.         if not currency_list:
  667.             raise ValueError('(S7Search) routes not found')
  668.         currency = currency_list[0]
  669.         directions = html_page.xpath('//div[@data-qa="selectFlight_block"]/div[@data-qa]')
  670.         for index, direction in enumerate(directions):
  671.             routes_found += self._extract_routes(index, direction, currency)
  672.         return routes_found
  673.  
  674.  
  675. def validate_date(date):
  676.     """Validates date and converts it to datetime object.
  677.  
  678.    :param date: date string
  679.    :return :class:'datetime' object if date string in format 'dd-mm-yyyy' and date not in the past
  680.    """
  681.     date_object = datetime.strptime(date.strip(), '%d-%m-%Y')
  682.     if date_object.date() >= datetime.now().date():
  683.         return date_object
  684.     else:
  685.         raise ValueError('invalid date: {}'.format(date_object.date()))
  686.  
  687.  
  688. def input_date(date_message, required_date=True):
  689.     """Requests date input.
  690.  
  691.    :param date_message:  message string for user
  692.    :param required_date: (optional)flag that shows mandatory date (default=True)
  693.    :return :class:'datetime' object
  694.    """
  695.     valid_date = None
  696.     while not valid_date:
  697.         try:
  698.             raw_date = raw_input(date_message).strip()
  699.             if required_date:
  700.                 valid_date = validate_date(raw_date)
  701.             else:
  702.                 if raw_date:
  703.                     valid_date = validate_date(raw_date)
  704.             return valid_date
  705.         except ValueError, error:
  706.             print error
  707.  
  708.  
  709. def input_codes_of_sites():
  710.     """Request web-sites codes input and checks it.
  711.  
  712.    :return list of web-sites for search
  713.    """
  714.     sites = []
  715.     while not sites:
  716.         input_string = raw_input('{0}Code of company: '.format(air_companies_info()))
  717.         if input_string:
  718.             try:
  719.                 codes = map(int, input_string.split())
  720.             except ValueError, error:
  721.                 print error
  722.                 continue
  723.             sites = [SITES_DICT[code] for code in codes if code in SITES_DICT]
  724.         else:
  725.             sites = SITES_DICT.values()
  726.     return sites
  727.  
  728.  
  729. def input_code_iata(message):
  730.     """Request code IATA input and performs simple check.
  731.  
  732.    :param message: message string for user
  733.    :return string
  734.    """
  735.     code_iata = ''
  736.     while not code_iata:
  737.         code_iata = raw_input(message).upper().strip()
  738.         if code_iata.isalpha() and len(code_iata) == 3:
  739.             return code_iata
  740.         else:
  741.             print 'Invalid code IATA'
  742.             code_iata = ''
  743.  
  744.  
  745. def air_companies_info():
  746.     """Constructs string of information about web-sites and their codes
  747.  
  748.    :return string
  749.    """
  750.     info = '\n'
  751.     for code in sorted(SITES_DICT.keys()):
  752.         info += '{1:<15} - {0}\n'.format(code, SITES_DICT[code].__name__)
  753.     return info
  754.  
  755.  
  756. def find_routes_in_multi_search():
  757.     """ Request input of all necessary data, executes search on several web-sites
  758.    and collects all routes.
  759.    Repeats action until it gets routes.
  760.  
  761.    :return list of :class:'Route' objects
  762.    """
  763.     routes_found = []
  764.     sites = input_codes_of_sites()
  765.     while not routes_found:
  766.         origin_code_iata = input_code_iata('Origin code IATA: ')
  767.         destination_code_iata = input_code_iata('Destination code IATA: ')
  768.         departure_date = input_date('Departure date (dd-mm-yyyy): ')
  769.         return_date = input_date('Return date (dd-mm-yyyy): ', required_date=False)
  770.         dates = sorted([date for date in (departure_date, return_date) if date])
  771.         for site in sites:
  772.             one_searcher = site(origin_code_iata, destination_code_iata, dates)
  773.             try:
  774.                 one_searcher.search_request()
  775.             except (ValueError, RequestException), error:
  776.                 print error
  777.                 continue
  778.             routes_found += one_searcher.routes_found
  779.     result_routes = _process_routes(routes_found)
  780.     return result_routes
  781.  
  782.  
  783. def _process_routes(routes):
  784.     """Constructs sorted by price list of routes/return routes
  785.  
  786.    :param routes: list of :class:'Route' objects
  787.    :return list of :class:'Route' objects or list of return-routes (pair of Routes in tuple)
  788.    """
  789.     key_date = routes[0].departure_date
  790.     one_direction = [route for route in routes if route.departure_date == key_date]
  791.     return_direction = [route for route in routes if route.departure_date != key_date]
  792.     if return_direction:
  793.         return_routes = product(one_direction, return_direction)
  794.         prepared_routes = sorted(return_routes, key=lambda x: x[0].lowest_price + x[1].lowest_price)
  795.     else:
  796.         prepared_routes = sorted(routes, key=lambda x: x.lowest_price)
  797.     return prepared_routes
  798.  
  799.  
  800. def print_routes(routes, maximum_routes_number):
  801.     """Prints routes/return routes information and prices on console
  802.  
  803.    :param routes: list of :class:'Route' objects or list of return-routes
  804.    :param maximum_routes_number: number of routes to be printed
  805.    """
  806.     quantity_of_routes = min((len(routes), maximum_routes_number))
  807.     routes = routes[:quantity_of_routes]
  808.     if isinstance(routes[0], Route):
  809.         for route in routes:
  810.             print route
  811.     elif isinstance(routes[0], tuple):
  812.         for route in routes:
  813.             full_price = '{0}\nFull price: {1} {2}\n'\
  814.                 .format('*' * 40, (route[0].lowest_price + route[1].lowest_price),
  815.                         route[0].route_prices[1].encode('utf-8'))
  816.             print '{0}\n{1}\n{2}'.format(route[0], route[1], full_price)
  817.     else:
  818.         print 'No routes'
  819.  
  820.  
  821. SITES_DICT = {1: AeroflotSearch, 2: S7Search, 3: NordwindSearch, 4: FlynikiSearch}
  822.  
  823. if __name__ == '__main__':
  824.     ROUTES = find_routes_in_multi_search()
  825.     print_routes(ROUTES, 25)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement