Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import urllib.request
- import threading
- import logging
- def error_in_news(fn):
- def aux(*args, **kwargs):
- try:
- return fn(*args, **kwargs)
- except Exception as e:
- logging.error(e)
- return None
- return aux
- class News():
- def __init__(self, object_queue_news):
- self.object_queue_news = object_queue_news
- get_any_news = [self.dev, self.tut, self.onliner, self.kyky, self.probusiness, self.belbiz]
- for news in get_any_news:
- self.start_thread(news)
- def publicParse(self,site,name_class):
- page = urllib.request.urlopen(site)
- soup = BeautifulSoup(page.read(),'lxml')
- posts = soup.findAll('div', {'class': name_class})
- return posts
- def dev(self):
- posts = self.publicParse('https://dev.by/','article-preview__info')
- dict_news = {}
- for post in posts:
- title = post.find('h3', {'class': 'article-preview__title'})
- link = post.find('a', {'class': 'article-preview__link'})
- dict_news[title.text] = 'https://dev.by'+link['href']
- self.object_queue_news.set_news(dict_news)
- def tut(self):
- posts = self.publicParse('https://news.tut.by/', 'news-section')
- dict_news = {}
- for post in posts:
- print("")
- title = post.find('span', {'class': 'entry-head'})
- link = post.find('a', {'class': 'entry__link'})
- dict_news[title.contents[0]] = link['href']
- self.object_queue_news.set_news(dict_news)
- def onliner(self):
- name_class = 'news-tidings__item news-tidings__item_1of3 news-tidings__item_condensed '
- posts = self.publicParse('https://tech.onliner.by/', name_class)
- dict_news = {}
- for post in posts:
- title = post.find('span', {'class': 'news-helpers_hide_mobile-small'})
- link = post.find('a', {'class': 'news-tidings__link'})
- dict_news[title.contents[0]] = 'https://tech.onliner.by/'+link['href']
- self.object_queue_news.set_news(dict_news)
- def kyky(self):
- posts = self.publicParse('http://kyky.org/', 'articles-list__item')
- dict_news = {}
- for post in posts:
- title = post.find('h2', {'class': 'article-preview__title'})
- link = post.find('a', {'class': 'article-preview__body'})
- dict_news[title.text] = 'http://kyky.org/' + link['href']
- self.object_queue_news.set_news(dict_news)
- def probusiness(self):
- page = urllib.request.urlopen('https://probusiness.io/articles/')
- soup = BeautifulSoup(page.read(),'lxml')
- posts = soup.findAll('li', {'news_item'})
- dict_news = {}
- for post in posts:
- title = post.find('h2', {'class': 'title'})
- print(title.text)
- link = post.find('a', {'class': 'media'})
- print(link['data-href'])
- dict_news[title.text] = link['data-href']
- self.object_queue_news.set_news(dict_news)
- def belbiz(self):
- page = urllib.request.urlopen('https://bel.biz/news/')
- soup = BeautifulSoup(page.read(),'lxml')
- posts = soup.findAll('article', {'class':'news-card'})
- dict_news = {}
- for post in posts:
- object = post.find('a', {'class': 'news-card__header'})
- callback_dict = self.search_title_and_link(object, object)
- if callback_dict is not None:
- dict_news = callback_dict
- self.object_queue_news.set_news(dict_news)
- @error_in_news
- def search_title_and_link(self, object, dict_news):
- title = object.text
- link = object['href']
- dict_news[title] = link
- return dict_news
- def start_thread(self, methods):
- thread = threading.Thread(target=methods, daemon=False)
- thread.start()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement