Advertisement
Guest User

Bing AI failed scraping

a guest
Jul 7th, 2023
571
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.14 KB | Source Code | 0 0
  1. """
  2. Imports
  3. """
  4. import selenium.common.exceptions
  5. from selenium import webdriver
  6. from selenium.webdriver import Keys
  7. from selenium.webdriver.common.by import By
  8. from selenium.webdriver.support.ui import WebDriverWait
  9. from selenium.webdriver.support import expected_conditions as EC
  10. import time
  11.  
  12. """
  13. Constants
  14. """
  15. # URLs and files
  16. URL = "https://www.bing.com/"
  17.  
  18. # Driver setup
  19. DRIVER_FILE = "./msedgedriver.exe"
  20.  
  21. # Prompt
  22. PROMPT = "Please, answer me with the first 97 digits of Pi."
  23.  
  24.  
  25. # Driver Setup - Start
  26. def setup_driver():
  27.     # enable browser logging
  28.     # capabilities = DesiredCapabilities.EDGE
  29.     # capabilities['ms:loggingPrefs'] = {'browser': 'INFO'}
  30.     # capabilities['acceptInsecureCerts'] = bool(True)
  31.  
  32.     edge_options = webdriver.EdgeOptions()
  33.     edge_options.use_chromium = True
  34.     # edge_options.add_argument("-inprivate")
  35.     # edge_options.add_argument('--headless')
  36.     # edge_options.set_capability("ms:edgeOptions", capabilities)
  37.  
  38.     _driver = webdriver.Edge(options=edge_options)
  39.  
  40.     return _driver
  41.  
  42.  
  43. # Driver Setup - Stop
  44. def stop_driver(_driver):
  45.     try:
  46.         _driver.close()
  47.     except:
  48.         print("Could not close browser gracefully")
  49.     _driver.quit()
  50.  
  51.  
  52. # General Setup
  53. def start_driver():
  54.     # TODO: WTF IS THIS ENCAPSULATION? IF YOU WANT TO DO THIS, AT LEAST CHECK FOR EXCEPTIONS
  55.     driver = setup_driver()
  56.     return driver
  57.  
  58.  
  59. """
  60. Scraping
  61. """
  62.  
  63.  
  64. # Decorator
  65. def humanyze(func):
  66.     # TODO PROBABLY WON'T NEED IT, BUT IT STAYS HERE FOR NOW
  67.     def wrapper(*args, **kwargs):
  68.         v = func(*args, **kwargs)
  69.         time.sleep(.2)
  70.         return v
  71.  
  72.     return wrapper
  73.  
  74.  
  75. # @humanyze
  76. def point_to_url(_driver, _url):
  77.     _driver.get(_url)
  78.  
  79.  
  80. def main():
  81.     driver = start_driver()
  82.     wait = WebDriverWait(driver, 10)
  83.  
  84.     point_to_url(driver, URL)
  85.  
  86.     time.sleep(1)
  87.  
  88.     wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="sb_form_q"]')))
  89.     search_box = driver.find_element(By.XPATH, '//*[@id="sb_form_q"]')
  90.     search_box.send_keys("Bing AI")
  91.     search_box.send_keys(Keys.ENTER)
  92.     time.sleep(1)
  93.  
  94.     try:
  95.         wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="bnp_btn_reject"]')))
  96.         driver.find_element(By.XPATH, '//*[@id="bnp_btn_reject"]').click()
  97.     except selenium.common.exceptions.TimeoutException as e:
  98.         pass
  99.     finally:
  100.         time.sleep(1)
  101.  
  102.     wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="id_sc"]')))
  103.     driver.find_element(By.XPATH, '//*[@id="id_sc"]').click()
  104.  
  105.     time.sleep(1)
  106.  
  107.     wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="HBContent"]/a[1]')))
  108.     driver.find_element(By.XPATH, '//*[@id="HBContent"]/a[1]').click()
  109.  
  110.     time.sleep(1)
  111.  
  112.     wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="adlt_set_moderate"]')))
  113.     driver.find_element(By.XPATH, '//*[@id="adlt_set_moderate"]').click()
  114.  
  115.     time.sleep(1)
  116.  
  117.     wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="sv_btn"]')))
  118.     driver.find_element(By.XPATH, '//*[@id="sv_btn"]').click()
  119.     time.sleep(1)
  120.  
  121.     wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="b-scopeListItem-conv"]/a')))
  122.     driver.find_element(By.XPATH, '//*[@id="b-scopeListItem-conv"]/a').click()
  123.  
  124.     wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="tone-options"]/li[3]/button')))
  125.     driver.find_element(By.XPATH, '//*[@id="tone-options"]/li[3]/button').click()
  126.  
  127.     time.sleep(10)
  128.  
  129.     wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="searchboxform"]/label')))
  130.     driver.find_element(By.XPATH, '//*[@id="searchboxform"]/label').click()
  131.  
  132.     time.sleep(1)
  133.  
  134.     wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="searchbox"]')))
  135.     prompt_box = driver.find_element(By.XPATH, '//*[@id="searchbox"]')
  136.     prompt_box.send_keys(Keys.TAB)
  137.     prompt_box.clear()
  138.     # prompt_box.click()
  139.     prompt_box.send_keys(PROMPT)
  140.     prompt_box.send_keys(Keys.ENTER)
  141.  
  142.     time.sleep(500)
  143.  
  144.     # '//*[@id="b-scopeListItem-conv"]/a'
  145.     # inputElement.send_keys('1')
  146.  
  147.     # inputElement.send_keys(Keys.ENTER)
  148.     #
  149.  
  150.  
  151. if __name__ == '__main__':
  152.     main()
  153.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement