Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import bs4
- import requests
- class Parser:
- _BAD_PREFIX_SET = {'Википедия:', 'Портал:'}
- def link_checker(self, link):
- for prefix in self._BAD_PREFIX_SET:
- if link['title'][:len(prefix)] == prefix:
- return False
- return True
- def all_links(self, html):
- soup = bs4.BeautifulSoup(html, features="html.parser")
- css_selector = 'div.mw-parser-output a[href^=\/wiki]:not([class])'
- links = soup.select(css_selector)
- return list(filter(lambda x: self.link_checker(x), links))g
- class WebGraph:
- def __init__(self, start_link, target):
- self.start_link = start_link
- self.target = target
- self.parser = Parser()
- self.used = set()
- def is_neighbours(self):
- links = self.parser.all_links(requests.get(self.start_link).text)
- return self.target in [link['title'] for link in links]
- graph = WebGraph('https://ru.wikipedia.org/wiki/Соединённые_Штаты_Америки', 'Великобритания')
- print(graph.is_neighbours())
Add Comment
Please, Sign In to add comment