Advertisement
Guest User

Untitled

a guest
Nov 16th, 2019
128
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.75 KB | None | 0 0
  1. from dataclasses import dataclass
  2. from typing import Dict, List, Any
  3. import requests
  4. from bs4 import BeautifulSoup
  5. import urllib.parse
  6. import re
  7.  
  8.  
  9. @dataclass
  10. class OsmResult:
  11. name: str
  12. type_: str
  13. lat: float
  14. lon: float
  15. extra_tags: Dict[str, str]
  16. address_tags: Dict[str, str]
  17. admin_level: int
  18.  
  19.  
  20. def get_place_data(place: str) -> List[OsmResult]:
  21. """
  22. Extracts place search results from OpenStreetMap.
  23. :param place: place to search
  24. :return: list of parsed results as OsmResult structure
  25. """
  26. url = urllib.parse.quote(place)
  27. r = requests.get('https://nominatim.openstreetmap.org/search.php?q=' + url + '&polygon_geojson=1&viewbox=',
  28. headers={'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7'}) # params lang ru-en
  29. soup = BeautifulSoup(r.text, "lxml")
  30. content = soup.find('div', id='searchresults')
  31. k: int = 0
  32. answ: Any = []
  33. for i in content:
  34. if k == 5:
  35. break
  36. try:
  37. if i.name == 'div':
  38. rez = OsmResult('', '', 0, 0, {}, {}, 0)
  39. rez.name = i.find('span', {'class': 'name'}).text
  40. rez.type_ = i.find('span', {'class': 'type'}).text[1:-1]
  41. details = requests.get('https://nominatim.openstreetmap.org/' + str(i.find('a')['href']),
  42. headers={'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7'})
  43. detsoap = BeautifulSoup(details.text, "lxml")
  44. rez.lat, rez.lon = detsoap.find('table', {'id': 'locationdetails'}).find('td',
  45. text=re.compile(
  46. 'Centre Point')). \
  47. nextSibling.text.split(
  48. ',')
  49. rez.lat = float(rez.lat)
  50. rez.lon = float(rez.lon)
  51. adresstags = {}
  52. extratags = {}
  53. detsoap.find('table', {'id': 'locationdetails'}).find('td',
  54. text=re.compile(
  55. 'Address Tags')).nextSibling.findAll('div')
  56. for q in detsoap.find('table', {'id': 'locationdetails'}).find('td',
  57. text=re.compile(
  58. 'Address Tags'))\
  59. .nextSibling.findAll('div'):
  60. try:
  61. key = q.find('span').next.next.lstrip()[1:-1]
  62. print(type(key))
  63. adresstags[key] = q.find('span').next
  64. except Exception:
  65. break
  66. else:
  67. '''print(k)
  68. print(q.find('span').next.next[2:-1])'''
  69. for j in detsoap.find('table', {'id': 'locationdetails'}).find('td', text=re.compile(
  70. 'Extra Tags')).nextSibling.findAll('div'):
  71. extratags[j.text.split()[-1][1:-1]] = ' '.join(j.text.split()[:-1])
  72. rez.extra_tags = extratags
  73. rez.address_tags = adresstags
  74. rez.admin_level = detsoap.find('table', {'id': 'locationdetails'}).findAll('tr')[3].findAll('td')[
  75. 1].text
  76. rez.admin_level = int(rez.admin_level)
  77. answ.append(rez)
  78. k += 1
  79. except Exception:
  80. break
  81. return answ
  82.  
  83.  
  84. res = get_place_data('Colombo')
  85. for x in res:
  86. print(x)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement