Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # coding:utf-8
- from bs4 import BeautifulSoup
- import requests
- import re
- import sys
- import unidecode
- class GoogleScraper(object):
- def __init__(self, question):
- self._url = "https://www.google.com/search?q="
- self._headers = {'User-agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36 OPR/55.0.2994.61'}
- self.question = u"%s" % question.decode('utf-8', 'ignore')
- def get_answers(self, site=None):
- sites = {1: 'https://brainly.com.br', 2: 'https://galeranerd.me'}
- site = sites.get(site)
- term2search = "site%3A{} {}".format(site, self.question).replace(' ', '+')
- req = requests.get("{google}{search_term}".format(google=self._url, search_term=term2search),
- headers=self._headers)
- if req.status_code == 200:
- response = req.content.decode('utf-8').encode('ascii', 'ignore')
- soup = BeautifulSoup(response, 'lxml')
- results = soup.findAll('h3', {'class': 'r'})
- for result in results:
- title = result.a.text
- link = result.a['href']
- if re.search(site.replace('/', '\\/').replace('.', '\\.'), link):
- if 'brainly' in link:
- link += "#answers"
- print('{}\n\t{}\n\n'.format(title, link))
- else:
- try:
- re.search(site.replace('/', '\\/').replace('.', '\\.'), link)
- except Exception as e:
- print("{}".format(e))
- else:
- return False
- if __name__ == '__main__':
- google = GoogleScraper(raw_input("Qual a pergunta? "))
- google.get_answers(int(raw_input("[1] - Brainly\n[2] - Galera Nerd\nEscolha: ")))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement