mfgnik

Untitled

Sep 27th, 2020
196
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.09 KB | None | 0 0
  1. import bs4
  2. import requests
  3.  
  4.  
  5. class Parser:
  6.     _BAD_PREFIX_SET = {'Википедия:', 'Портал:'}
  7.  
  8.     def link_checker(self, link):
  9.         for prefix in self._BAD_PREFIX_SET:
  10.             if link['title'][:len(prefix)] == prefix:
  11.                 return False
  12.         return True
  13.  
  14.     def all_links(self, html):
  15.         soup = bs4.BeautifulSoup(html, features="html.parser")
  16.         css_selector = 'div.mw-parser-output a[href^=\/wiki]:not([class])'
  17.         links = soup.select(css_selector)
  18.         return list(filter(lambda x: self.link_checker(x), links))g
  19.  
  20.  
  21.  
  22. class WebGraph:
  23.     def __init__(self, start_link, target):
  24.         self.start_link = start_link
  25.         self.target = target
  26.         self.parser = Parser()
  27.         self.used = set()
  28.  
  29.     def is_neighbours(self):
  30.         links = self.parser.all_links(requests.get(self.start_link).text)
  31.         return self.target in [link['title'] for link in links]
  32.  
  33.  
  34. graph = WebGraph('https://ru.wikipedia.org/wiki/Соединённые_Штаты_Америки', 'Великобритания')
  35. print(graph.is_neighbours())
  36.  
Add Comment
Please, Sign In to add comment