Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import json
- import reverse_geocoder as rg
- import math
- class Subway:
- def __init__(self):
- self.ektb_subway = self._get_underground_dict(r'additional_files/ektb_subway.json')
- self.msk_subway = self._get_underground_dict(r'additional_files/msk_subway.json')
- self.novosib_subway = self._get_underground_dict(r'additional_files/novosib_subway.json')
- self.stptr_subway = self._get_underground_dict(r'additional_files/stptr_subway.json')
- def _get_underground_dict(self, json_underground_date):
- """
- Json was get from hh.ru website.
- :param path to json_underground_date
- :return: dictionary where key is underground name and value is coordinate tuple.
- """
- underground_dict = {}
- with open(json_underground_date) as f:
- json_data = json.load(f)
- for line in json_data['lines']:
- for station in line['stations']:
- latitude = station['lat']
- longtitude = station['lng']
- station_name = station['name']
- underground_dict[station_name] = (latitude, longtitude)
- return underground_dict
- def _get_distance_km(self, point_1, point_2):
- """
- :param point_1: tuple where first element - latitude, second element - longitude
- :param point_2: tuple where first element - latitude, second element - longitude
- :return: distance in km between two points
- """
- radius_meters = 6378.1
- fi_1 = math.radians(point_1[1])
- fi_2 = math.radians(point_2[1])
- delta_fi = math.radians(point_2[1] - point_1[1])
- delta_lambda = math.radians(point_2[0] - point_1[0])
- a = math.sin(delta_fi / 2) * math.sin(delta_fi / 2) + math.cos(fi_1) * math.cos(fi_2) * math.sin(
- delta_lambda / 2) * \
- math.sin(delta_lambda / 2)
- c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
- return radius_meters * c
- #Разбито на 2 разныхм метода, хотя по идее можно использовать один
- def get_nearest_subway_distance(self, data_from_csv):
- """
- Getting naearest
- :param city:
- :param coordinates:
- :return: list where first element is name of the nearest station and the second element is the distance (km)
- """
- city, coordinates = data_from_csv[0], data_from_csv[1]
- underground_dict = {}
- if city == 'Moscow':
- for key, coordinate in self.msk_subway.items():
- underground_dict[key] = self._get_distance_km(coordinate, coordinates)
- return sorted(underground_dict.items(), key=lambda kv: kv[1])[0]
- elif city == 'Saint Petersburg':
- for key, coordinate in self.stptr_subway.items():
- underground_dict[key] = self._get_distance_km(coordinate, coordinates)
- return sorted(underground_dict.items(), key=lambda kv: kv[1])[0]
- elif city == 'Yekaterinburg':
- for key, coordinate in self.ektb_subway.items():
- underground_dict[key] = self._get_distance_km(coordinate, coordinates)
- return sorted(underground_dict.items(), key=lambda kv: kv[1])[0]
- elif city == 'Novosibirsk':
- for key, coordinate in self.novosib_subway.items():
- underground_dict[key] = self._get_distance_km(coordinate, coordinates)
- return sorted(underground_dict.items(), key=lambda kv: kv[1])[0]
- else:
- return ['no_station', -1]
- def get_region_city(coordinates):
- geo_info = rg.search(coordinates)
- region, city = geo_info[0]["admin1"], geo_info[0]["name"]
- return region, city
- # def nearest_underground(city, point):
- # """
- # :param point: users coordinates as tuple where first element - longitude, second element - latitude
- # :param point: users coordinates as tuple where first element - longitude, second element - latitude
- # :return: sorted dictionary by value, where key is underground name and value is distance
- # """
- # underground_dict = {}
- # if city == 'Moscow':
- # pass
- # all_underground = get_underground_dict(
- # r'/Users/a.eryomin/PycharmProjects/flat_price_prediction/list_metro_underground.json')
- # for key, coordinate in all_underground.items():
- # underground_dict[key] = get_distance_km(coordinate, point)
- # return sorted(underground_dict.items(), key=lambda kv: kv[1])[0]
- if __name__ == '__main__':
- subway_obj = Subway()
- mongo_db = pd.read_csv('data_from_mongo__.csv',
- sep=',',
- names=['price',
- 'latitude',
- 'longtitude',
- 'balcony',
- 'subway_station',
- 'realty_god_postroyki',
- 'nearest_subway_distance_km',
- 'rooms_number',
- 'orders_owner',
- 'elevator',
- 'total_area',
- 'kitchen_area',
- 'maintenance',
- 'bathroom',
- 'occupancy',
- 'building_type',
- 'floor',
- 'total_floor'])
- mongo_db['latitude_longtitude'] = tuple(zip(mongo_db.latitude, mongo_db.longtitude))
- mongo_db['region'], mongo_db['city'] = zip(*mongo_db["latitude_longtitude"].map(get_region_city))
- mongo_db['subway_station'], mongo_db['nearest_subway_distance_km'] = zip(*mongo_db[['city', 'latitude_longtitude']].apply(subway_obj.get_nearest_subway_distance, axis=1))
- mongo_db.to_csv('data_from_mongo_with_city_region.csv', sep=',', header=True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement