### Takes a URL and returns a BeautifulSoup object (or None/errorMsg if there is an error) ### ### [ For when BeautifulSoup(requests.get(url).content) is not enough ] ###################### ## full version at https://pastebin.com/kEC9gPC8 ## requests-based version/s at https://pastebin.com/rBTr06vy and https://pastebin.com/5ibz2F6p ## [if you want a quick tutorial on selenium, see https://www.scrapingbee.com/blog/selenium-python/] #### REQUIRED: download chromedriver.exe from https://chromedriver.chromium.org/downloads #### #### [AND copy chromedriver.exe to the same folder as this py file] #### import time from bs4 import BeautifulSoup from selenium import webdriver def linkToSoup_selenium(lUrl, tmout=None, fparser='html.parser', isv=True, returnErr=False): try: # I copy chromedriver.exe to the same folder as this py file driver = webdriver.Chrome() driver.maximize_window() driver.get(lUrl) if type(tmout) in [int, float]: time.sleep(tmout) lSoup = BeautifulSoup(driver.page_source, fparser) driver.close() del driver # (just in case) return lSoup except Exception as e: if isv: print(str(e)) ## set isv=False to suppress error message ## return str(e) if returnErr else None