Advertisement
nicuf

Make Search on Google by keywords

Feb 12th, 2023 (edited)
1,042
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.43 KB | None | 0 0
  1.  
  2. import requests
  3. import urllib.parse
  4. import pandas as pd
  5. from requests_html import HTML
  6. from requests_html import HTMLSession
  7. import pprint
  8.  
  9. def get_source(url):
  10.     """Return the source code for the provided URL.
  11.  
  12.    Args:
  13.        url (string): URL of the page to scrape.
  14.  
  15.    Returns:
  16.        response (object): HTTP response object from requests_html.
  17.    """
  18.  
  19.     try:
  20.         session = HTMLSession()
  21.         response = session.get(url)
  22.         return response
  23.  
  24.     except requests.exceptions.RequestException as e:
  25.         print(e)
  26.  
  27. def get_results(query):
  28.  
  29.     query = urllib.parse.quote_plus(query)
  30.     response = get_source("https://www.google.com/search?q=" + query)
  31.  
  32.     return response
  33.  
  34. def parse_results(response):
  35.  
  36.     css_identifier_result = ".tF2Cxc"
  37.     css_identifier_title = "h3"
  38.     css_identifier_link = ".yuRUbf a"
  39.     css_identifier_text = ".VwiC3b"
  40.  
  41.     results = response.html.find(css_identifier_result)
  42.  
  43.     output = []
  44.  
  45.     for result in results:
  46.  
  47.         item = {
  48.             #'title': result.find(css_identifier_title, first=True).text,
  49.             #'link': result.find(css_identifier_link, first=True).attrs['href'],
  50.             # Textul fara data articolului primele 15 caractere
  51.             'text': result.find(css_identifier_text, first=True).text
  52.         }
  53.  
  54.         output.append(item)
  55.  
  56.     return output
  57.  
  58. def google_search(query):
  59.     # steluta e pusa automat
  60.     cautare = '"' + query +' *"'
  61.     print("cautarea solicitata: ", cautare)
  62.     response = get_results(cautare)
  63.     results = parse_results(response)
  64.     ## Ce nu functioneaza inca
  65.     for item in results:
  66.         text = proceseaza_text(item['text'])
  67.         #print(query, "---->", text)
  68.         pozitie_text = text.find(query)
  69.         if pozitie_text > 0:
  70.             item['text'] = text[pozitie_text:]
  71.     return results
  72.  
  73. def proceseaza_text(text):
  74.     # transformam totul in litere mici
  75.     text = text.lower()
  76.     dictionar = {
  77.         "ă": "a",
  78.         "â": "a",
  79.         "î": "i",
  80.         "ș": "s",
  81.         "ş": "s",
  82.         "ț": "t",
  83.         "ţ": "t",
  84.         ",": "",
  85.         "-": "",
  86.         ".": ""
  87.     }
  88.     for key, value in dictionar.items():
  89.         text = text.replace(key, value)
  90.     return text
  91.  
  92. for cautare in ["Napoleon s-a nascut", "cutremurele din Turcia şi Siria", "cat costa un televizor Philips", "de ce are nevoie un nou nascut"]:
  93.     pprint.pprint(google_search(cautare))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement