Advertisement
renix1

g1 scrapper

Sep 17th, 2018
152
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.43 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import requests
  3.  
  4.  
  5. class GScraper(object):
  6.     def __init__(self):
  7.         self._url = "http://g1.globo.com"
  8.         self.response = None
  9.         self.soup = None
  10.  
  11.     def get_response(self):
  12.         response = requests.get(self._url)
  13.         if response.status_code == 200:
  14.             self.response = response.text.encode('utf-8').decode('ascii', 'ignore')
  15.             self.soup = BeautifulSoup(self.response, 'lxml')
  16.             return True
  17.         else:
  18.             return False
  19.  
  20.     def news(self):
  21.         all_news2return = []
  22.         all_news = self.soup.findAll('a', {'class': 'feed-post-link'})
  23.         for news in all_news:
  24.             all_news2return.append((news.text, news['href']))
  25.         return all_news2return
  26.  
  27.     def details(self, link):
  28.         if link:
  29.             r = requests.get(link)
  30.             if r.status_code == 200:
  31.                 response = r.text.encode('utf-8').decode('ascii', 'ignore')
  32.                 soup = BeautifulSoup(response, 'lxml')
  33.                 title = soup.find('h1', {'class': 'content-head__title'})
  34.                 paragraphs = soup.findAll('p', {'class': 'content-text__container'})
  35.                 print title.text
  36.                 for p in paragraphs:
  37.                     if p.text:
  38.                         print p.text
  39.         else:
  40.             return False
  41.  
  42.  
  43. if __name__ == "__main__":
  44.     g1_bot = GScraper()
  45.     g1_bot.get_response()
  46.     links = g1_bot.news()
  47.     last_link = links[0][1]
  48.     g1_bot.details(last_link)
  49. else:
  50.     print("I don't 'll run")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement