Advertisement
Guest User

Untitled

a guest
Dec 18th, 2018
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.90 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import urllib.request
  3. import threading
  4. import logging
  5.  
  6. def error_in_news(fn):
  7. def aux(*args, **kwargs):
  8. try:
  9. return fn(*args, **kwargs)
  10. except Exception as e:
  11. logging.error(e)
  12. return None
  13. return aux
  14.  
  15. class News():
  16. def __init__(self, object_queue_news):
  17. self.object_queue_news = object_queue_news
  18. get_any_news = [self.dev, self.tut, self.onliner, self.kyky, self.probusiness, self.belbiz]
  19. for news in get_any_news:
  20. self.start_thread(news)
  21.  
  22. def publicParse(self,site,name_class):
  23. page = urllib.request.urlopen(site)
  24. soup = BeautifulSoup(page.read(),'lxml')
  25. posts = soup.findAll('div', {'class': name_class})
  26. return posts
  27.  
  28. def dev(self):
  29. posts = self.publicParse('https://dev.by/','article-preview__info')
  30. dict_news = {}
  31. for post in posts:
  32. title = post.find('h3', {'class': 'article-preview__title'})
  33. link = post.find('a', {'class': 'article-preview__link'})
  34. dict_news[title.text] = 'https://dev.by'+link['href']
  35. self.object_queue_news.set_news(dict_news)
  36.  
  37. def tut(self):
  38. posts = self.publicParse('https://news.tut.by/', 'news-section')
  39. dict_news = {}
  40. for post in posts:
  41. print("")
  42. title = post.find('span', {'class': 'entry-head'})
  43. link = post.find('a', {'class': 'entry__link'})
  44. dict_news[title.contents[0]] = link['href']
  45. self.object_queue_news.set_news(dict_news)
  46.  
  47. def onliner(self):
  48. name_class = 'news-tidings__item news-tidings__item_1of3 news-tidings__item_condensed '
  49. posts = self.publicParse('https://tech.onliner.by/', name_class)
  50. dict_news = {}
  51. for post in posts:
  52. title = post.find('span', {'class': 'news-helpers_hide_mobile-small'})
  53. link = post.find('a', {'class': 'news-tidings__link'})
  54. dict_news[title.contents[0]] = 'https://tech.onliner.by/'+link['href']
  55. self.object_queue_news.set_news(dict_news)
  56.  
  57. def kyky(self):
  58. posts = self.publicParse('http://kyky.org/', 'articles-list__item')
  59. dict_news = {}
  60. for post in posts:
  61. title = post.find('h2', {'class': 'article-preview__title'})
  62. link = post.find('a', {'class': 'article-preview__body'})
  63. dict_news[title.text] = 'http://kyky.org/' + link['href']
  64. self.object_queue_news.set_news(dict_news)
  65.  
  66. def probusiness(self):
  67. page = urllib.request.urlopen('https://probusiness.io/articles/')
  68. soup = BeautifulSoup(page.read(),'lxml')
  69. posts = soup.findAll('li', {'news_item'})
  70. dict_news = {}
  71. for post in posts:
  72. title = post.find('h2', {'class': 'title'})
  73. print(title.text)
  74. link = post.find('a', {'class': 'media'})
  75. print(link['data-href'])
  76. dict_news[title.text] = link['data-href']
  77. self.object_queue_news.set_news(dict_news)
  78.  
  79. def belbiz(self):
  80. page = urllib.request.urlopen('https://bel.biz/news/')
  81. soup = BeautifulSoup(page.read(),'lxml')
  82. posts = soup.findAll('article', {'class':'news-card'})
  83. dict_news = {}
  84. for post in posts:
  85. object = post.find('a', {'class': 'news-card__header'})
  86. callback_dict = self.search_title_and_link(object, object)
  87. if callback_dict is not None:
  88. dict_news = callback_dict
  89. self.object_queue_news.set_news(dict_news)
  90.  
  91. @error_in_news
  92. def search_title_and_link(self, object, dict_news):
  93. title = object.text
  94. link = object['href']
  95. dict_news[title] = link
  96. return dict_news
  97.  
  98. def start_thread(self, methods):
  99. thread = threading.Thread(target=methods, daemon=False)
  100. thread.start()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement