Advertisement
renix1

gscrapper to answers

Sep 17th, 2018
199
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.53 KB | None | 0 0
  1. # coding:utf-8
  2. from bs4 import BeautifulSoup
  3. import requests
  4. import re
  5. import sys
  6. import unidecode
  7.  
  8.  
  9. class GoogleScraper(object):
  10.     def __init__(self, question):
  11.         self._url = "https://www.google.com/search?q="
  12.         self._headers = {'User-agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36 OPR/55.0.2994.61'}
  13.         self.question = u"%s" % question.decode('utf-8', 'ignore')
  14.  
  15.     def get_answers(self, site=None):
  16.         sites = {1: 'https://brainly.com.br', 2: 'https://galeranerd.me'}
  17.         site = sites.get(site)
  18.         term2search = "site%3A{} {}".format(site, self.question).replace(' ', '+')
  19.         req = requests.get("{google}{search_term}".format(google=self._url, search_term=term2search),
  20.                             headers=self._headers)
  21.         if req.status_code == 200:
  22.             response = req.content.decode('utf-8').encode('ascii', 'ignore')
  23.             soup = BeautifulSoup(response, 'lxml')
  24.             results = soup.findAll('h3', {'class': 'r'})
  25.             for result in results:
  26.                 title = result.a.text
  27.                 link = result.a['href']
  28.                 if re.search(site.replace('/', '\\/').replace('.', '\\.'), link):
  29.                     if 'brainly' in link:
  30.                         link += "#answers"
  31.                     print('{}\n\t{}\n\n'.format(title, link))
  32.             else:
  33.                 try:
  34.                     re.search(site.replace('/', '\\/').replace('.', '\\.'), link)
  35.                 except Exception as e:
  36.                     print("{}".format(e))
  37.         else:
  38.             return False
  39.  
  40. if __name__ == '__main__':
  41.     google = GoogleScraper(raw_input("Qual a pergunta? "))
  42.     google.get_answers(int(raw_input("[1] - Brainly\n[2] - Galera Nerd\nEscolha: ")))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement