Advertisement
tokarevms

AirSearch v2.0

Aug 22nd, 2017
161
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 36.14 KB | None | 0 0
  1. ПОСЛЕДНЯЯ ВЕРСИЯ КОДА ТУТ  --->    https://pastebin.com/ruh4f9qQ    
  2. на 9.81/10 в pylint
  3. # coding: utf-8
  4. """
  5. This module implements searching for airline-routes on several web-sites.
  6.  
  7. To get routes use find_routes_on_multi_search().
  8. To print results in console use print_routes().
  9. """
  10. from datetime import datetime
  11. from abc import ABCMeta, abstractmethod
  12. import io
  13. from itertools import product
  14. from lxml import html
  15. from lxml.etree import XMLSyntaxError
  16. from requests.exceptions import RequestException
  17. import requests
  18.  
  19.  
  20. class Route(object):
  21.     """Forms route.
  22.  
  23.    Contains information about route: date departure (datetime object), dictionary of route prices, route duration,
  24.    city origin, city destination, currency, list of flights(list of Flight objects)
  25.    """
  26.     def __init__(self, departure_date, route_prices, route_duration,
  27.                  origin_city, destination_city, currency, route_flights):
  28.         self.route_flights = route_flights
  29.         self.route_prices = route_prices
  30.         self.route_duration = route_duration
  31.         self.origin_city = origin_city
  32.         self.destination_city = destination_city
  33.         self.currency = currency
  34.         self.departure_date = departure_date
  35.         all_route_prices = (price.encode('utf-8') for price in self.route_prices.values())
  36.         self.lowest_price = min(map(float, all_route_prices))
  37.  
  38.     def __repr__(self):
  39.         repr_string = u'_' * 40
  40.         repr_string += u'\n{0}\t{1}\t{2}\t{3}\t{4} {5}\n'\
  41.             .format(self.origin_city, self.departure_date.date(), self.route_duration,
  42.                     self.destination_city, self.lowest_price, self.currency)
  43.         repr_string += u'Prices[{}]:\n'.format(self.currency)
  44.         for fare_type in self.route_prices:
  45.             repr_string += u'{0} - {1}\n'.format(fare_type, self.route_prices[fare_type])
  46.         for flight in self.route_flights:
  47.             repr_string += str(flight)
  48.         return repr_string.encode('utf-8')
  49.  
  50.  
  51. class Flight(object):
  52.     """Forms flight.
  53.  
  54.    Contains information: flight number, time departure, time arrival, origin code IATA,
  55.    destination code IATA
  56.    """
  57.     def __init__(self, flight_code, departure_time, arrival_time, origin_code_iata,
  58.                  destination_code_iata):
  59.         self.departure_time = departure_time
  60.         self.arrival_time = arrival_time
  61.         self.flight_code = flight_code
  62.         self.origin_code_iata = origin_code_iata
  63.         self.destination_code_iata = destination_code_iata
  64.  
  65.     def __repr__(self):
  66.         repr_string = u'\n\t{0}\t{1}\t{2} ---> {3}\t{4}'\
  67.             .format(self.flight_code, self.departure_time, self.origin_code_iata,
  68.                     self.destination_code_iata, self.arrival_time)
  69.         return repr_string.encode('utf-8')
  70.  
  71.  
  72. class SearchForRoutes(object):
  73.     """Base class for searching on sites.
  74.  
  75.    arguments: origin code IATA
  76.    destination code IATA
  77.    list of dates (datetime objects)
  78.    """
  79.     __metaclass__ = ABCMeta
  80.  
  81.     def __init__(self, origin_code_iata, destination_code_iata, route_dates):
  82.         self.search_request_url = ''
  83.         self.validation_url = ''
  84.         self.route_dates = route_dates
  85.         self.origin_code_iata = origin_code_iata
  86.         self.destination_code_iata = destination_code_iata
  87.         self.routes_found = []
  88.         self.output_file_encoding = 'utf-8'
  89.  
  90.     def _decode_json(self, response_json):    # Decodes JSON
  91.         try:
  92.             decoded_json = response_json.json()
  93.         except ValueError:
  94.             raise ValueError('({})JSON decoding failed, routes not found'
  95.                              .format(self.__class__.__name__))
  96.         return decoded_json
  97.  
  98.     def _decode_html_string(self, html_string):    # Parse string and return DOM
  99.         try:
  100.             page_html = html.fromstring(html_string)
  101.         except XMLSyntaxError:
  102.             raise ValueError('({})HTML string failed, routes not found'
  103.                              .format(self.__class__.__name__))
  104.         return page_html
  105.  
  106.     @abstractmethod
  107.     def make_request_params(self, session):
  108.         """Makes parameters for searching request.
  109.  
  110.        arguments: session for current search-request on particular web-site
  111.  
  112.        return: HTTP-request method(string) and dictionary of special parameters
  113.        (headers, body, JSON etc.)
  114.        """
  115.         pass
  116.  
  117.     @abstractmethod
  118.     def validate_codes_iata(self, session):
  119.         """Validates codes IATA for search-request.
  120.  
  121.        arguments: session for current search-request on particular web-site
  122.  
  123.        return: valid codes IATA (or other parameters instead codes, which needs for search)
  124.        if no valid dates then raises ValueError
  125.        """
  126.         pass
  127.  
  128.     def send_request_and_check(self, session, request_method, url, request_params):
  129.         """Sends request and checks server response.
  130.  
  131.        arguments: session for current search-request on particular web-site
  132.        HTTP-request method
  133.        URL
  134.        dictionary of special parameters (headers, body, JSON etc.)
  135.  
  136.        return: response object
  137.        if request failed then raises RequestException
  138.        """
  139.         try:
  140.             response = session.request(request_method, url, **request_params)
  141.             response.raise_for_status()
  142.         except RequestException, error:
  143.             print '({}) request failed'.format(self.__class__.__name__)
  144.             raise error
  145.         return response
  146.  
  147.     def search_request(self):
  148.         """Make search-request and finds routes."""
  149.         session = requests.Session()
  150.         self.origin_code_iata, self.destination_code_iata = self.validate_codes_iata(session)
  151.         request_method, request_params = self.make_request_params(session)
  152.         search_response = self.send_request_and_check(session, request_method,
  153.                                                       self.search_request_url, request_params)
  154.         self.routes_found = self.extract_data_from_response(search_response)
  155.  
  156.     @abstractmethod
  157.     def extract_data_from_response(self, response):
  158.         """Extracts useful information from server response.
  159.  
  160.        arguments: response object
  161.  
  162.        return: list of Routes
  163.        """
  164.         pass
  165.  
  166.     def output_extracted_routes(self):
  167.         """Prints routes' information on console and writes in Result.txt file."""
  168.         with io.open('Result.txt', 'a', encoding=self.output_file_encoding) as output_file:
  169.             for route in self.routes_found:
  170.                 print route
  171.                 output_file.write(route)
  172.         print "Export of results is completed"
  173.  
  174.     def search(self):
  175.         """Search on particular web-site and output routes' information on console and file."""
  176.         self.search_request()
  177.         if self.routes_found:
  178.             self.output_extracted_routes()
  179.  
  180.  
  181. class AeroflotSearch(SearchForRoutes):
  182.     """Route-search on Aeroflot web-site."""
  183.     def __init__(self, origin_code_iata, destination_code_iata, route_dates):
  184.         SearchForRoutes.__init__(self, origin_code_iata, destination_code_iata, route_dates)
  185.         self.search_request_url = 'https://www.aeroflot.ru/sb/booking/api/app/search/v2'
  186.         self.validation_url = 'https://www.aeroflot.ru/sb/booking/api/app/cities/v1'
  187.  
  188.     @staticmethod
  189.     def _find_in_json(code, cities):    # Finds code IATA in JSON for validation
  190.         for city in cities['data']['cities']:
  191.             if code == city['code']:
  192.                 return code
  193.             else:
  194.                 for airport in city['airports']:
  195.                     if code == airport['code']:
  196.                         return code
  197.         return None
  198.  
  199.     @staticmethod
  200.     def _make_validation_request_params():    # Makes validation request parameters
  201.         validation_params = {'json': {'lang': 'ru'}}
  202.         return validation_params
  203.  
  204.     def validate_codes_iata(self, session):
  205.         """Validates codes IATA for search-request for Aeroflot web-site."""
  206.         valid_codes_iata = []
  207.         validation_params = self._make_validation_request_params()
  208.         cities_json = self.send_request_and_check(session, 'POST', self.validation_url,
  209.                                                   validation_params)
  210.         cities = self._decode_json(cities_json)
  211.         for code_iata in self.origin_code_iata, self.destination_code_iata:
  212.             code_found = self._find_in_json(code_iata, cities)
  213.             if not code_found:
  214.                 raise ValueError('(AeroflotSearch) invalid code IATA: {}'.format(code_iata))
  215.             valid_codes_iata.append(code_found)
  216.         print '(AeroflotSerach) valid codes IATA'
  217.         return valid_codes_iata[0], valid_codes_iata[1]
  218.  
  219.     def make_request_params(self, session):
  220.         """Makes HTTP-request method and parameters for Aeroflot."""
  221.         origin_code_iata = self.origin_code_iata
  222.         destination_code_iata = self.destination_code_iata
  223.         json_param_routes = []
  224.         for date in self.route_dates:
  225.             route = {
  226.                 "origin": origin_code_iata,
  227.                 "destination": destination_code_iata,
  228.                 "departure": date.strftime('%Y-%m-%d')
  229.             }
  230.             json_param_routes.append(route)
  231.             origin_code_iata, destination_code_iata = destination_code_iata, origin_code_iata
  232.         json_params = {
  233.             "routes": json_param_routes,
  234.             "cabin": "econom",
  235.             "country": "ru",
  236.             "adults": 1,
  237.             "combined": False,
  238.             "lang": "ru"
  239.         }
  240.         search_request_method = 'POST'
  241.         search_request_params = {'json': json_params}
  242.         return search_request_method, search_request_params
  243.  
  244.     @staticmethod
  245.     def _extract_route_prices(route):    # Extracts dictionary of prices from route info
  246.         route_prices = {}
  247.         for price in route['prices']:
  248.             route_price = price['total_amount']
  249.             route_faretype = price['fare_group_name']
  250.             route_prices[route_faretype] = route_price
  251.         return route_prices
  252.  
  253.     @staticmethod
  254.     def _extract_flights_from_route(route):    # Extracts flights from route info
  255.         route_flights = []
  256.         for leg in route['legs']:
  257.             for flight in leg['segments']:
  258.                 number = flight['flight_number']
  259.                 air_code = flight['airline_code']
  260.                 raw_departure_time = flight['departure']
  261.                 departure_time = datetime.strptime(raw_departure_time, '%Y-%m-%d %H:%M')
  262.                 arrival_time = flight['arrival'][-5:]
  263.                 origin_code_iata = flight['origin']['airport_code']
  264.                 destination_code_iata = flight['destination']['airport_code']
  265.                 one_flight = Flight(air_code + number, departure_time, arrival_time,
  266.                                     origin_code_iata, destination_code_iata)
  267.                 route_flights.append(one_flight)
  268.         return route_flights
  269.  
  270.     def _extract_routes(self, direction):    # Extracts routes from direction
  271.         routes_extracted = []
  272.         city_origin = direction[0]['legs'][0]['segments'][0]['origin']['city_name']
  273.         city_destination = direction[0]['legs'][0]['segments'][-1]['destination']['city_name']
  274.         currency = direction[0]['prices'][0]['currency']
  275.         raw_departure_date = direction[0]['legs'][0]['segments'][0]['departure'][:10]
  276.         departure_date = datetime.strptime(raw_departure_date, '%Y-%m-%d')
  277.         for route in direction:
  278.             route_prices = self._extract_route_prices(route)
  279.             route_flights = self._extract_flights_from_route(route)
  280.             route_duration = route['time_name']
  281.             one_route = Route(departure_date, route_prices, route_duration, city_origin,
  282.                               city_destination, currency, route_flights)
  283.             routes_extracted.append(one_route)
  284.         return routes_extracted
  285.  
  286.     def extract_data_from_response(self, response):
  287.         """Extracts routes from Aeroflot JSON-response."""
  288.         routes_found = []
  289.         json_routes = self._decode_json(response)
  290.         if not json_routes['data']['itineraries']:
  291.             raise ValueError('(AeroflotSearch) routes not found')
  292.         for direction in json_routes['data']['itineraries']:
  293.             routes_found += self._extract_routes(direction)
  294.         return routes_found
  295.  
  296.  
  297. class NordwindSearch(SearchForRoutes):
  298.     """Route-search on Nordwind web-site."""
  299.     def __init__(self, origin_code_iata, destination_code_iata, route_dates):
  300.         SearchForRoutes.__init__(self, origin_code_iata, destination_code_iata, route_dates)
  301.         self.search_request_url = 'https://airbook.nordwindairlines.ru/online/json/' \
  302.                                   'search-variants-mono-brand-cartesian'
  303.         self.validation_url = 'https://airbook.nordwindairlines.ru/online/json/dependence-cities'
  304.  
  305.     @staticmethod
  306.     def _find_in_json(code, cities):    # Finds code IATA in JSON for validation
  307.         for city in cities['origin']:
  308.             if code == city['codeEn']:
  309.                 return code
  310.         return None
  311.  
  312.     def validate_codes_iata(self, session):
  313.         """Validates codes IATA for search-request for Nordwind web-site."""
  314.         valid_codes_iata = []
  315.         validation_params = {}
  316.         cities_json = self.send_request_and_check(session, 'GET', self.validation_url,
  317.                                                   validation_params)
  318.         cities = self._decode_json(cities_json)
  319.         for code_iata in self.origin_code_iata, self.destination_code_iata:
  320.             code_found = self._find_in_json(code_iata, cities)
  321.             if not code_found:
  322.                 raise ValueError('(NordWindSearch) invalid code IATA: {}'.format(code_iata))
  323.             valid_codes_iata.append(code_found)
  324.         print '(NordWindSearch) valid codes IATA'
  325.         return valid_codes_iata[0], valid_codes_iata[1]
  326.  
  327.     def make_request_params(self, session):
  328.         """Makes HTTP-request method and parameters for Nordwind."""
  329.         origin_code_iata = self.origin_code_iata
  330.         destination_code_iata = self.destination_code_iata
  331.         param_routes = {}
  332.         segment_count = 0
  333.         for date in self.route_dates:
  334.             route = {
  335.                 'date[%s]' % segment_count: date.strftime('%d.%m.%Y'),
  336.                 'origin-city-code[%s]' % segment_count: origin_code_iata,
  337.                 'destination-city-code[%s]' % segment_count: destination_code_iata
  338.             }
  339.             origin_code_iata, destination_code_iata = destination_code_iata, origin_code_iata
  340.             param_routes.update(route)
  341.             segment_count += 1
  342.         requst_params = {
  343.             'segmentsCount': segment_count,
  344.             'lang': 'ru',
  345.             'count-aaa': 1
  346.         }
  347.         requst_params.update(param_routes)
  348.         params = {'params': requst_params}
  349.         method = 'GET'
  350.         return method, params
  351.  
  352.     @staticmethod
  353.     def _extract_flights_from_route(route):    # Extracts flights from route info
  354.         route_flights = []
  355.         for flight in route['flights']:
  356.             number = flight['racenumber']
  357.             air_code = flight['carrier']
  358.             depart_time = flight['departuretime']
  359.             depart_date = flight['departuredate']
  360.             departure_time = datetime.strptime(depart_date + depart_time, '%d.%m.%Y%H:%M')
  361.             arrival_time = flight['arrivaltime']
  362.             origin_code_iata = flight['originport']
  363.             destination_code_iata = flight['destinationport']
  364.             one_flight = Flight(air_code + number, departure_time,
  365.                                 arrival_time, origin_code_iata, destination_code_iata)
  366.             route_flights.append(one_flight)
  367.         return route_flights
  368.  
  369.     @staticmethod
  370.     def _extract_route_prices(routes_price_list, chain_id):    # Extracts prices from route info
  371.         route_prices = {}
  372.         for price in routes_price_list[chain_id]:
  373.             route_price = price['price']
  374.             route_faretype = price['brand'][:12]
  375.             route_prices[route_faretype] = route_price
  376.         return route_prices
  377.  
  378.     def _extract_routes(self, json_routes, routes_price_list):    # Extracts routes from JSON
  379.         extracted_routes = []
  380.         for route in json_routes['flights']:
  381.             chain_id = route['chainId']
  382.             route_flights = self._extract_flights_from_route(route)
  383.             route_prices = self._extract_route_prices(routes_price_list, chain_id)
  384.             route_duration = route['flights'][0]['flighttime']
  385.             city_origin = route['flights'][0]['origincityName']
  386.             city_destination = route['flights'][0]['destinationcityName']
  387.             currency = routes_price_list[chain_id][0]['currency']
  388.             departure_date = datetime.strptime(route['flights'][0]['departuredate'], '%d.%m.%Y')
  389.             one_route = Route(departure_date, route_prices, route_duration, city_origin,
  390.                               city_destination, currency, route_flights)
  391.             extracted_routes.append(one_route)
  392.         return extracted_routes
  393.  
  394.     def extract_data_from_response(self, response):
  395.         """Extracts routes from Nordwind JSON-response."""
  396.         json_routes = self._decode_json(response)
  397.         if 'error' in json_routes:
  398.             raise ValueError('(NordwindSearch) routes not found')
  399.         routes_price_list = {}
  400.         for prices in json_routes['prices']:
  401.             routes_price_list.update(prices)
  402.         routes_found = self._extract_routes(json_routes, routes_price_list)
  403.         return routes_found
  404.  
  405.  
  406. class FlynikiSearch(SearchForRoutes):
  407.     """Route-search on Flyniki web-site."""
  408.     def __init__(self, origin_code_iata, destination_code_iata, route_dates):
  409.         SearchForRoutes.__init__(self, origin_code_iata, destination_code_iata, route_dates)
  410.         self.search_request_url = 'https://www.flyniki.com/en/booking/flight/vacancy.php'
  411.         self.validation_url = 'https://www.flyniki.com/en/site/json/suggestAirport.php'
  412.  
  413.     @staticmethod
  414.     def _find_in_json(code, cities):    # Finds code IATA in JSON for validation, returns city name
  415.         for city in cities['suggestList']:
  416.             if code == city['code']:
  417.                 return city['name']
  418.         return None
  419.  
  420.     def validate_codes_iata(self, session):
  421.         """Validates codes IATA for search-request for Flyniki web-site.
  422.  
  423.        return: city names, which are used for creating search-request
  424.        """
  425.         valid_cities = []
  426.         payload = {'searchflightid': 0,
  427.                    'suggestsource[]': 'activeairports',
  428.                    'departures[]': self.origin_code_iata,
  429.                    'destinations[]': self.destination_code_iata,
  430.                    'routesource[0]': 'airberlin',
  431.                    'routesource[1]': 'partner'}
  432.         validation_params = {'params': payload}
  433.         for search_for in 'departures', 'destinations':
  434.             validation_params['params'].update({'searchfor': search_for})
  435.             key = '{}[]'.format(search_for)
  436.             code_iata = validation_params['params'][key]
  437.             cities_json = self.send_request_and_check(session, 'GET', self.validation_url,
  438.                                                       validation_params)
  439.             cities = self._decode_json(cities_json)
  440.             if not cities['suggestList']:
  441.                 raise ValueError('(FlyNikiSearch) invalid code IATA: {}'.format(code_iata))
  442.             city_found = self._find_in_json(code_iata, cities)
  443.             if not city_found:
  444.                 raise ValueError('(FlyNikiSearch) invalid code IATA: {}'.format(code_iata))
  445.             valid_cities.append(city_found)
  446.         print '(FlynikiSearch) valid codes IATA'
  447.         return valid_cities[0], valid_cities[1]
  448.  
  449.     def get_sid(self, session):
  450.         """Makes simple request and gets SID number for future search.
  451.  
  452.        arguments: session
  453.  
  454.        return: sid number(string)
  455.        """
  456.         sid_request_headers = {'Content-Type': 'application/x-www-form-urlencoded'}
  457.         sid_request_body = r'market=RU&language=en&bookingmask_' \
  458.                            r'widget_dateformat=dd/mm/yy&returnDate={}'\
  459.             .format(datetime.now().strftime('%d/%m/%Y'))
  460.         sid_request_params = {'headers': sid_request_headers, 'data': sid_request_body}
  461.         sid_request_url = 'https://www.flyniki.com/en/start.php'
  462.         response = self.send_request_and_check(session, 'POST', sid_request_url, sid_request_params)
  463.         sid = response.url.split('=')[1]
  464.         return sid
  465.  
  466.     def make_request_params(self, session):
  467.         """Makes HTTP-request method and parameters for Flyniki."""
  468.         origin_code_iata = self.origin_code_iata
  469.         destination_code_iata = self.destination_code_iata
  470.         sid = self.get_sid(session)
  471.         request_data = r'_ajax[requestParams][adultCount]=1&_ajax[templates][]=main&' \
  472.                        r'_ajax[templates][]=priceoverview&_ajax[templates][]=infos&' \
  473.                        r'_ajax[templates][]=flightinfo&_ajax[requestParams][departure]={0}&' \
  474.                        r'_ajax[requestParams][destination]={1}'\
  475.             .format(origin_code_iata, destination_code_iata)
  476.         request_data += r'&_ajax[requestParams][outboundDate]={0}&' \
  477.                         r'_ajax[requestParams][returnDate]={1}'\
  478.             .format(self.route_dates[0].strftime('%Y-%m-%d'),
  479.                     self.route_dates[-1].strftime('%Y-%m-%d'))
  480.         if len(self.route_dates) == 1:
  481.             request_data += r'&_ajax[requestParams][oneway]=on'
  482.         request_headers = {'Content-Type': 'application/x-www-form-urlencoded'}
  483.         request_payload = {'sid': sid}
  484.         search_request_params = {'headers': request_headers, 'data': request_data,
  485.                                  'params': request_payload}
  486.         search_request_method = 'POST'
  487.         return search_request_method, search_request_params
  488.  
  489.     @staticmethod
  490.     def _extract_flights_from_route(route):    # Extracts flights from route info
  491.         route_flights = []
  492.         for flight in route.xpath('./following-sibling::tr[1]//tbody/tr'):
  493.             flight_number = flight.xpath('./td[4]/text()')[0]
  494.             departure_time = flight.xpath('./td[2]/span/time/text()')[0]
  495.             arrival_time = flight.xpath('./td[3]/span/time/text()')[0]
  496.             origin = flight.xpath('./td[2]/span/text()')[1]
  497.             destination = flight.xpath('./td[3]/span/text()')[1]
  498.             origin_code_iata = origin.split(',')[1].strip()
  499.             destination_code_iata = destination.split(',')[1].strip()
  500.             one_flight = Flight(flight_number, departure_time, arrival_time, origin_code_iata,
  501.                                 destination_code_iata)
  502.             route_flights.append(one_flight)
  503.         return route_flights
  504.  
  505.     @staticmethod
  506.     def _extract_route_prices(route):    # Extracts prices from route info
  507.         route_prices = {}
  508.         for price in route.xpath('./td[position() > 4][label]'):
  509.             route_price = price.xpath('./label/div/span/text()')[0]
  510.             faretype = price.xpath('.//input[@name="faretype"]/@value')[0]
  511.             faregroup = price.xpath('.//input[@name="faregroup"]/@value')[0]
  512.             route_prices[faregroup + faretype] = route_price
  513.         return route_prices
  514.  
  515.     def _extract_routes(self, direction, currency):    # Extracts routes from direction
  516.         routes_extracted = []
  517.         route_title = direction.xpath('.//div[@class="vacancy_route"]/text()')[0]
  518.         cities = route_title.split(',')[0]
  519.         city_origin = cities.split(u'–')[0]
  520.         city_destination = cities.split(u'–')[1]
  521.         raw_departure_date = route_title.split(',')[-1]
  522.         departure_date = datetime.strptime(raw_departure_date.strip(), '%d/%m/%y')
  523.         for route in direction.xpath('.//tr[@role="group"]'):
  524.             route_flights = self._extract_flights_from_route(route)
  525.             route_prices = self._extract_route_prices(route)
  526.             route_duration = route.xpath('./td[4]/span/text()')[0]
  527.             one_route = Route(departure_date, route_prices, route_duration, city_origin,
  528.                               city_destination, currency, route_flights)
  529.             routes_extracted.append(one_route)
  530.         return routes_extracted
  531.  
  532.     def extract_data_from_response(self, response):
  533.         """Extracts routes from Flyniki mixed JSON/HTML-response."""
  534.         routes_found = []
  535.         json_html = self._decode_json(response)
  536.         html_page = self._decode_html_string(json_html['templates']['main'])
  537.         currency_list = html_page.xpath('.//th/@aria-label')
  538.         if not currency_list:
  539.             raise ValueError('(FlynikiSearch) routes not found')
  540.         currency = currency_list[0].split()[0]
  541.         directions = html_page.xpath('//div[@id="flighttables"]/div[div[@class="row"]]')
  542.         for direction in directions:
  543.             routes_found += self._extract_routes(direction, currency)
  544.         return routes_found
  545.  
  546.  
  547. class S7Search(SearchForRoutes):
  548.     """Route-search on S7 web-site."""
  549.     def __init__(self, origin_code_iata, destination_code_iata, route_dates):
  550.         SearchForRoutes.__init__(self, origin_code_iata, destination_code_iata, route_dates)
  551.         self.search_request_url = 'https://travelwith.s7.ru/ajax/actions/updateFlightsSearch.action'
  552.         self.validation_url = 'https://www.s7.ru/app/LocationService'
  553.  
  554.     @staticmethod
  555.     def _find_in_json(code, cities):    # Finds code IATA in JSON for validation, returns
  556.         for city in cities['c']:        # special code
  557.             if code == city['iata']:
  558.                 return city['code']
  559.         return None
  560.  
  561.     def validate_codes_iata(self, session):
  562.         """Validates codes IATA for search-request for S7 web-site
  563.  
  564.        return: special search-parameters
  565.        """
  566.         valid_codes_iata = []
  567.         payload = {'action': 'get_locations',
  568.                    'searchType': 'avia'}
  569.         validation_params = {'params': payload}
  570.         for code_iata in self.origin_code_iata, self.destination_code_iata:
  571.             validation_params['params'].update({'str': code_iata})
  572.             cities_json = self.send_request_and_check(session, 'GET', self.validation_url,
  573.                                                       validation_params)
  574.             cities = self._decode_json(cities_json)
  575.             if not cities['c']:
  576.                 raise ValueError('(S7Search) invalid code IATA: {}'.format(code_iata))
  577.             code_found = self._find_in_json(code_iata, cities)
  578.             if not code_found:
  579.                 raise ValueError('(S7Search) invalid code IATA: {}'.format(code_iata))
  580.             valid_codes_iata.append(code_found)
  581.         print '(S7Search) valid codes IATA'
  582.         return valid_codes_iata[0], valid_codes_iata[1]
  583.  
  584.     def make_request_params(self, session):
  585.         """Makes HTTP-request method and parameters for S7"""
  586.         origin_code_iata = self.origin_code_iata
  587.         destination_code_iata = self.destination_code_iata
  588.         request_data = r'model.page=FLIGHTS_SELECT_PAGE&model.milesEnabled=true&' \
  589.                        r'model.directFlightsOnly=false&model.flexible=false&' \
  590.                        r'model.redemption=false&model.currencyType=RUB&' \
  591.                        r'model.multiCitySearchType=PRICE&model.departurePoint={0}&' \
  592.                        r'model.arrivalPoint={1}&model.departureDate={2}&' \
  593.                        r'model.adultsCount=1'.format(origin_code_iata, destination_code_iata,
  594.                                                      self.route_dates[0].strftime('%d.%m.%Y'))
  595.         if len(self.route_dates) == 2:
  596.             request_data += r'&model.routeType=ROUND_TRIP&model.arrivalDate={}'\
  597.                 .format(self.route_dates[1].strftime('%d.%m.%Y'))
  598.         else:
  599.             request_data += r'&model.routeType=ONE_WAY'
  600.         request_headers = {'Content-Type': 'application/x-www-form-urlencoded'}
  601.         search_request_params = {'headers': request_headers, 'data': request_data}
  602.         search_request_method = 'POST'
  603.         return search_request_method, search_request_params
  604.  
  605.     @staticmethod
  606.     def _extract_route_prices(route):    # Extracts prices from route info
  607.         route_prices = {}
  608.         for price in route.xpath('.//div[div[@class="radiobutton"]]'):
  609.             route_price = price.xpath('.//span[@data-qa="amount"]/text()')[0].replace(u'\xa0', '')
  610.             route_faretype = price.xpath('./@data-tariff-type')[0]
  611.             route_prices[route_faretype] = route_price
  612.         return route_prices
  613.  
  614.     @staticmethod
  615.     def _extract_flights_from_route(route):    # Extracts flights from route info
  616.         flights = route.xpath('.//div[@data-qa="extended_info"]')
  617.         route_flights = []
  618.         if len(flights) == 1:
  619.             flights_info = [route]
  620.         else:
  621.             flights_info = flights
  622.         for flight in flights_info:
  623.             flight_number = flight.xpath('.//span[@data-qa="number_flightItem"]/text()')[0]
  624.             departure_time = flight.xpath('.//time[@data-qa="timeDeparture_flightItem"]/text()')[0]
  625.             arrival_time = flight.xpath('.//time[@data-qa="timeArrived_flightItem"]/text()')[0]
  626.             origin_code_iata = flight.xpath('.//span[@data-qa="airportDeparture_flightItem"]/text()')[0]
  627.             destination_code_iata = flight.xpath('.//span[@data-qa="airportArrived_flightItem"]/text()')[0]
  628.             one_flight = Flight(flight_number, departure_time, arrival_time, origin_code_iata,
  629.                                 destination_code_iata)
  630.             route_flights.append(one_flight)
  631.         return route_flights
  632.  
  633.     def _extract_routes(self, index, direction, currency):    # Extracts routes from direction
  634.         routes_extracted = []
  635.         cities = direction.xpath('.//div[@class="route"]/text()')
  636.         city_origin = cities[0].strip()
  637.         city_destination = cities[2].strip()
  638.         departure_date = self.route_dates[index]
  639.         for route in direction.xpath('.//div[@data-qa="listFlight"]/div[@data-qa]'):
  640.             route_prices = self._extract_route_prices(route)
  641.             route_flights = self._extract_flights_from_route(route)
  642.             route_duration = route.xpath('.//div[@data-qa="durationTotal_flightItemShort"]/text()')[0]
  643.             one_route = Route(departure_date, route_prices, route_duration,
  644.                               city_origin, city_destination, currency, route_flights)
  645.             routes_extracted.append(one_route)
  646.         return routes_extracted
  647.  
  648.     def extract_data_from_response(self, response):
  649.         """Extracts routes from S7 HTML-response."""
  650.         routes_found = []
  651.         html_page = self._decode_html_string(response.text)
  652.         if not html_page.xpath('//span[@data-qa="currency"][1]/text()'):
  653.             raise ValueError('(S7Search) routes not found')
  654.         currency = html_page.xpath('//span[@data-qa="currency"][1]/text()')[0]
  655.         directions = html_page.xpath('//div[@data-qa="selectFlight_block"]/div[@data-qa]')
  656.         for index, direction in enumerate(directions):
  657.             routes_found += self._extract_routes(index, direction, currency)
  658.         return routes_found
  659.  
  660.  
  661. def validate_date(date):
  662.     """Validates date and converts it to datetime object.
  663.  
  664.    arguments: date(string) format: dd-mm-yyyy
  665.  
  666.    return: datetime object
  667.    """
  668.     date_object = datetime.strptime(date.strip(), '%d-%m-%Y')
  669.     if date_object.date() >= datetime.now().date():
  670.         return date_object
  671.     else:
  672.         raise ValueError('invalid date: {}'.format(date_object.date()))
  673.  
  674.  
  675. def input_date(date_message, required_date=True):
  676.     """Request date input.
  677.  
  678.    arguments: message(string) for user
  679.    flag required_date(boolean) shows mandatory date (default=True)
  680.  
  681.    return: datetime object
  682.    """
  683.     valid_date = None
  684.     while not valid_date:
  685.         try:
  686.             raw_date = raw_input(date_message).strip()
  687.             if required_date:
  688.                 valid_date = validate_date(raw_date)
  689.             else:
  690.                 if raw_date:
  691.                     valid_date = validate_date(raw_date)
  692.             return valid_date
  693.         except ValueError, error:
  694.             print error
  695.  
  696.  
  697. def input_codes_of_sites():
  698.     """Request web-sites codes input and checks it.
  699.  
  700.    return: list of web-sites for search
  701.    """
  702.     sites = []
  703.     while not sites:
  704.         input_string = raw_input('{0}Code of company: '.format(air_companies_info()))
  705.         if input_string:
  706.             try:
  707.                 codes = map(int, input_string.split())
  708.             except ValueError, error:
  709.                 print error
  710.                 continue
  711.             sites = [SITES_DICT[code] for code in codes if code in SITES_DICT]
  712.         else:
  713.             sites = SITES_DICT.values()
  714.     return sites
  715.  
  716.  
  717. def input_code_iata(message):
  718.     """Request code IATA input and performs simple check.
  719.  
  720.    arguments: message for user
  721.  
  722.    return: inputted code IATA(string)
  723.    """
  724.     code_iata = ''
  725.     while not code_iata:
  726.         code_iata = raw_input(message).upper().strip()
  727.         if code_iata.isalpha() and len(code_iata) == 3:
  728.             return code_iata
  729.         else:
  730.             print 'Invalid code IATA'
  731.             code_iata = ''
  732.  
  733.  
  734. def air_companies_info():
  735.     """Constructs string of information about web-sites and their codes
  736.  
  737.    return: information(string)
  738.    """
  739.     info = '\n'
  740.     for code in sorted(SITES_DICT.keys()):
  741.         info += '{1:<15} - {0}\n'.format(code, SITES_DICT[code].__name__)
  742.     return info
  743.  
  744.  
  745. def find_routes_in_multi_search():
  746.     """ Request input of all necessary data, executes search on several web-sites
  747.    and collects all routes.
  748.    Repeats action until it gets routes.
  749.  
  750.    return: list of Routes
  751.    """
  752.     routes_found = []
  753.     sites = input_codes_of_sites()
  754.     while not routes_found:
  755.         origin_code_iata = input_code_iata('Origin code IATA: ')
  756.         destination_code_iata = input_code_iata('Destination code IATA: ')
  757.         departure_date = input_date('Departure date (dd-mm-yyyy): ')
  758.         return_date = input_date('Return date (dd-mm-yyyy): ', required_date=False)
  759.         dates = sorted([date for date in (departure_date, return_date) if date])
  760.         for site in sites:
  761.             one_searcher = site(origin_code_iata, destination_code_iata, dates)
  762.             try:
  763.                 one_searcher.search_request()
  764.             except (ValueError, requests.exceptions.RequestException), error:
  765.                 print error
  766.                 continue
  767.             routes_found += one_searcher.routes_found
  768.     result_routes = _process_routes(routes_found)
  769.     return result_routes
  770.  
  771.  
  772. def _process_routes(routes):
  773.     """Constructs sorted by price list of routes/return routes
  774.  
  775.    arguments: list of Routes
  776.  
  777.    return: sorted list of Routes
  778.    """
  779.     key_date = routes[0].departure_date
  780.     one_direction = [route for route in routes if route.departure_date == key_date]
  781.     return_direction = [route for route in routes if route.departure_date != key_date]
  782.     if return_direction:
  783.         return_routes = product(one_direction, return_direction)
  784.         prepared_routes = sorted(return_routes, key=lambda x: x[0].lowest_price + x[1].lowest_price)
  785.     else:
  786.         prepared_routes = sorted(routes, key=lambda x: x.lowest_price)
  787.     return prepared_routes
  788.  
  789.  
  790. def print_routes(routes, maximum_routes_number):
  791.     """Prints routes/return routes information and prices on console
  792.  
  793.    arguments: list of Routes
  794.    maximum number of outputted routes from list of routes
  795.    """
  796.     quantity_of_routes = min((len(routes), maximum_routes_number)) - 1
  797.     routes = routes[:quantity_of_routes]
  798.     if isinstance(routes[0], Route):
  799.         for route in routes:
  800.             print route
  801.     elif isinstance(routes[0], tuple):
  802.         for route in routes:
  803.             full_price = '{0}\nFull price: {1} {2}\n'\
  804.                 .format('*' * 40, (route[0].lowest_price +
  805.                                    route[1].lowest_price), route[0].currency.encode('utf-8'))
  806.             print '{0}\n{1}\n{2}'.format(route[0], route[1], full_price)
  807.     else:
  808.         print 'No routes to print'
  809.  
  810.  
  811. SITES_DICT = {1: AeroflotSearch, 2: S7Search, 3: NordwindSearch, 4: FlynikiSearch}
  812.  
  813. if __name__ == '__main__':
  814.     ROUTES = find_routes_in_multi_search()
  815.     print_routes(ROUTES, 25)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement