Advertisement
Guest User

Untitled

a guest
Aug 16th, 2018
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.23 KB | None | 0 0
  1. import pandas as pd
  2. import json
  3. import reverse_geocoder as rg
  4. import math
  5.  
  6. class Subway:
  7. def __init__(self):
  8. self.ektb_subway = self._get_underground_dict(r'additional_files/ektb_subway.json')
  9. self.msk_subway = self._get_underground_dict(r'additional_files/msk_subway.json')
  10. self.novosib_subway = self._get_underground_dict(r'additional_files/novosib_subway.json')
  11. self.stptr_subway = self._get_underground_dict(r'additional_files/stptr_subway.json')
  12.  
  13. def _get_underground_dict(self, json_underground_date):
  14. """
  15. Json was get from hh.ru website.
  16. :param path to json_underground_date
  17. :return: dictionary where key is underground name and value is coordinate tuple.
  18. """
  19. underground_dict = {}
  20. with open(json_underground_date) as f:
  21. json_data = json.load(f)
  22. for line in json_data['lines']:
  23. for station in line['stations']:
  24. latitude = station['lat']
  25. longtitude = station['lng']
  26. station_name = station['name']
  27. underground_dict[station_name] = (latitude, longtitude)
  28. return underground_dict
  29.  
  30. def _get_distance_km(self, point_1, point_2):
  31. """
  32. :param point_1: tuple where first element - latitude, second element - longitude
  33. :param point_2: tuple where first element - latitude, second element - longitude
  34. :return: distance in km between two points
  35. """
  36. radius_meters = 6378.1
  37. fi_1 = math.radians(point_1[1])
  38. fi_2 = math.radians(point_2[1])
  39. delta_fi = math.radians(point_2[1] - point_1[1])
  40. delta_lambda = math.radians(point_2[0] - point_1[0])
  41. a = math.sin(delta_fi / 2) * math.sin(delta_fi / 2) + math.cos(fi_1) * math.cos(fi_2) * math.sin(
  42. delta_lambda / 2) * \
  43. math.sin(delta_lambda / 2)
  44. c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
  45. return radius_meters * c
  46.  
  47. #Разбито на 2 разныхм метода, хотя по идее можно использовать один
  48. def get_nearest_subway_distance(self, data_from_csv):
  49. """
  50. Getting naearest
  51. :param city:
  52. :param coordinates:
  53. :return: list where first element is name of the nearest station and the second element is the distance (km)
  54. """
  55. city, coordinates = data_from_csv[0], data_from_csv[1]
  56. underground_dict = {}
  57. if city == 'Moscow':
  58. for key, coordinate in self.msk_subway.items():
  59. underground_dict[key] = self._get_distance_km(coordinate, coordinates)
  60. return sorted(underground_dict.items(), key=lambda kv: kv[1])[0]
  61. elif city == 'Saint Petersburg':
  62. for key, coordinate in self.stptr_subway.items():
  63. underground_dict[key] = self._get_distance_km(coordinate, coordinates)
  64. return sorted(underground_dict.items(), key=lambda kv: kv[1])[0]
  65. elif city == 'Yekaterinburg':
  66. for key, coordinate in self.ektb_subway.items():
  67. underground_dict[key] = self._get_distance_km(coordinate, coordinates)
  68. return sorted(underground_dict.items(), key=lambda kv: kv[1])[0]
  69. elif city == 'Novosibirsk':
  70. for key, coordinate in self.novosib_subway.items():
  71. underground_dict[key] = self._get_distance_km(coordinate, coordinates)
  72. return sorted(underground_dict.items(), key=lambda kv: kv[1])[0]
  73. else:
  74. return ['no_station', -1]
  75.  
  76.  
  77.  
  78. def get_region_city(coordinates):
  79. geo_info = rg.search(coordinates)
  80. region, city = geo_info[0]["admin1"], geo_info[0]["name"]
  81. return region, city
  82.  
  83.  
  84.  
  85. # def nearest_underground(city, point):
  86. # """
  87. # :param point: users coordinates as tuple where first element - longitude, second element - latitude
  88. # :param point: users coordinates as tuple where first element - longitude, second element - latitude
  89. # :return: sorted dictionary by value, where key is underground name and value is distance
  90. # """
  91. # underground_dict = {}
  92. # if city == 'Moscow':
  93. # pass
  94. # all_underground = get_underground_dict(
  95. # r'/Users/a.eryomin/PycharmProjects/flat_price_prediction/list_metro_underground.json')
  96. # for key, coordinate in all_underground.items():
  97. # underground_dict[key] = get_distance_km(coordinate, point)
  98. # return sorted(underground_dict.items(), key=lambda kv: kv[1])[0]
  99.  
  100.  
  101. if __name__ == '__main__':
  102. subway_obj = Subway()
  103. mongo_db = pd.read_csv('data_from_mongo__.csv',
  104. sep=',',
  105. names=['price',
  106. 'latitude',
  107. 'longtitude',
  108. 'balcony',
  109. 'subway_station',
  110. 'realty_god_postroyki',
  111. 'nearest_subway_distance_km',
  112. 'rooms_number',
  113. 'orders_owner',
  114. 'elevator',
  115. 'total_area',
  116. 'kitchen_area',
  117. 'maintenance',
  118. 'bathroom',
  119. 'occupancy',
  120. 'building_type',
  121. 'floor',
  122. 'total_floor'])
  123.  
  124. mongo_db['latitude_longtitude'] = tuple(zip(mongo_db.latitude, mongo_db.longtitude))
  125. mongo_db['region'], mongo_db['city'] = zip(*mongo_db["latitude_longtitude"].map(get_region_city))
  126. mongo_db['subway_station'], mongo_db['nearest_subway_distance_km'] = zip(*mongo_db[['city', 'latitude_longtitude']].apply(subway_obj.get_nearest_subway_distance, axis=1))
  127. mongo_db.to_csv('data_from_mongo_with_city_region.csv', sep=',', header=True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement