Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from dataclasses import dataclass
- from typing import Dict, List, Any
- import requests
- from bs4 import BeautifulSoup
- import urllib.parse
- import re
- @dataclass
- class OsmResult:
- name: str
- type_: str
- lat: float
- lon: float
- extra_tags: Dict[str, str]
- address_tags: Dict[str, str]
- admin_level: int
- def get_place_data(place: str) -> List[OsmResult]:
- """
- Extracts place search results from OpenStreetMap.
- :param place: place to search
- :return: list of parsed results as OsmResult structure
- """
- url = urllib.parse.quote(place)
- r = requests.get('https://nominatim.openstreetmap.org/search.php?q=' + url + '&polygon_geojson=1&viewbox=',
- headers={'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7'}) # params lang ru-en
- soup = BeautifulSoup(r.text, "lxml")
- content = soup.find('div', id='searchresults')
- k: int = 0
- answ: Any = []
- for i in content:
- if k == 5:
- break
- try:
- if i.name == 'div':
- rez = OsmResult('', '', 0, 0, {}, {}, 0)
- rez.name = i.find('span', {'class': 'name'}).text
- rez.type_ = i.find('span', {'class': 'type'}).text[1:-1]
- details = requests.get('https://nominatim.openstreetmap.org/' + str(i.find('a')['href']),
- headers={'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7'})
- detsoap = BeautifulSoup(details.text, "lxml")
- rez.lat, rez.lon = detsoap.find('table', {'id': 'locationdetails'}).find('td',
- text=re.compile(
- 'Centre Point')). \
- nextSibling.text.split(
- ',')
- rez.lat = float(rez.lat)
- rez.lon = float(rez.lon)
- adresstags = {}
- extratags = {}
- detsoap.find('table', {'id': 'locationdetails'}).find('td',
- text=re.compile(
- 'Address Tags')).nextSibling.findAll('div')
- for q in detsoap.find('table', {'id': 'locationdetails'}).find('td',
- text=re.compile(
- 'Address Tags'))\
- .nextSibling.findAll('div'):
- try:
- key = q.find('span').next.next.lstrip()[1:-1]
- print(type(key))
- adresstags[key] = q.find('span').next
- except Exception:
- break
- else:
- '''print(k)
- print(q.find('span').next.next[2:-1])'''
- for j in detsoap.find('table', {'id': 'locationdetails'}).find('td', text=re.compile(
- 'Extra Tags')).nextSibling.findAll('div'):
- extratags[j.text.split()[-1][1:-1]] = ' '.join(j.text.split()[:-1])
- rez.extra_tags = extratags
- rez.address_tags = adresstags
- rez.admin_level = detsoap.find('table', {'id': 'locationdetails'}).findAll('tr')[3].findAll('td')[
- 1].text
- rez.admin_level = int(rez.admin_level)
- answ.append(rez)
- k += 1
- except Exception:
- break
- return answ
- res = get_place_data('Colombo')
- for x in res:
- print(x)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement