Advertisement
mfgnik

Untitled

Oct 11th, 2020
121
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.83 KB | None | 0 0
  1. import bs4
  2. import requests
  3. from collections import deque
  4.  
  5.  
  6. class Parser:
  7.     _BAD_PREFIX_SET = {'Википедия:', 'Портал:'}
  8.  
  9.     def link_checker(self, link):
  10.         for prefix in self._BAD_PREFIX_SET:
  11.             if link['title'][:len(prefix)] == prefix:
  12.                 return False
  13.         return True
  14.  
  15.     def get_links(self, html):
  16.         soup = bs4.BeautifulSoup(html, features="html.parser")
  17.         css_selector = 'div.mw-parser-output a[href^=\/wiki]:not([class])'
  18.         links = soup.select(css_selector)
  19.         return list(map(lambda link: link['title'], filter(lambda x: self.link_checker(x), links)))
  20.  
  21.     def get_target_title(self, html):
  22.         soup = bs4.BeautifulSoup(html, features="html.parser")
  23.         return soup.select_one('h1').text
  24.  
  25. class WebGraph:
  26.     def __init__(self, start, target_link):
  27.         self.start = start
  28.         self.parser = Parser()
  29.         self.target = self.parser.get_target_title(requests.get(target_link).text)
  30.         self.used = {start: 0}
  31.  
  32.     @staticmethod
  33.     def build_link(title):
  34.         return f'https://ru.wikipedia.org/wiki/{title}'
  35.  
  36.     def bfs(self):
  37.         deq = deque()
  38.         deq.append(self.start)
  39.         while deq:
  40.             link = deq.popleft()
  41.             for other_link in self.parser.get_links(requests.get(self.build_link(link)).text):
  42.                 print(link, other_link)
  43.                 if other_link in self.used:
  44.                     continue
  45.                 if other_link == self.target:
  46.                     return self.used[link] + 1
  47.                 self.used[other_link] = self.used[link] + 1
  48.                 if self.used[other_link] > 5:
  49.                     return -1
  50.                 deq.append(other_link)
  51.  
  52.  
  53. graph = WebGraph('Трамп', 'https://ru.wikipedia.org/wiki/Тэтчер,_Маргарет')
  54. print(graph.bfs())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement