Advertisement
mfgnik

Untitled

Oct 9th, 2020 (edited)
126
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.08 KB | None | 0 0
  1. import bs4
  2. import requests
  3.  
  4.  
  5. class Parser:
  6.     _BAD_PREFIX_SET = {'Википедия:', 'Портал:'}
  7.  
  8.     def link_checker(self, link):
  9.         for prefix in self._BAD_PREFIX_SET:
  10.             if link['title'][:len(prefix)] == prefix:
  11.                 return False
  12.         return True
  13.  
  14.     def all_links(self, html):
  15.         soup = bs4.BeautifulSoup(html, features="html.parser")
  16.         css_selector = 'div.mw-parser-output a[href^=\/wiki]:not([class])'
  17.         links = soup.select(css_selector)
  18.         return list(filter(lambda x: self.link_checker(x), links))
  19.  
  20.  
  21. class WebGraph:
  22.     def __init__(self, start, target):
  23.         self.start = start
  24.         self.target = target
  25.         self.parser = Parser()
  26.         self.used = {start: 0}
  27.  
  28.     def is_neighbours(self):
  29.         links = self.parser.all_links(requests.get(f'https://ru.wikipedia.org/wiki/{self.start}').text)
  30.         return self.target in [link['title'] for link in links]
  31.  
  32.  
  33. graph = WebGraph('Соединённые Штаты Америки', 'Великобритания')
  34. print(graph.is_neighbours())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement