Not a member of Pastebin yet?
                        Sign Up,
                        it unlocks many cool features!                    
                - ### Takes a URL and returns a BeautifulSoup object (or None/errorMsg if there is an error) ###
 - ### [ For when BeautifulSoup(requests.get(url).content) is not enough ] ######################
 - ## full version at https://pastebin.com/kEC9gPC8
 - ## requests-based version/s at https://pastebin.com/rBTr06vy and https://pastebin.com/5ibz2F6p
 - ## [if you want a quick tutorial on selenium, see https://www.scrapingbee.com/blog/selenium-python/]
 - #### REQUIRED: download chromedriver.exe from https://chromedriver.chromium.org/downloads ####
 - #### [AND copy chromedriver.exe to the same folder as this py file] ####
 - import time
 - from bs4 import BeautifulSoup
 - from selenium import webdriver
 - def linkToSoup_selenium(lUrl, tmout=None, fparser='html.parser', isv=True, returnErr=False):
 - try:
 - # I copy chromedriver.exe to the same folder as this py file
 - driver = webdriver.Chrome()
 - driver.maximize_window()
 - driver.get(lUrl)
 - if type(tmout) in [int, float]: time.sleep(tmout)
 - lSoup = BeautifulSoup(driver.page_source, fparser)
 - driver.close()
 - del driver # (just in case)
 - return lSoup
 - except Exception as e:
 - if isv: print(str(e)) ## set isv=False to suppress error message ##
 - return str(e) if returnErr else None
 
Advertisement
 
                    Add Comment                
                
                        Please, Sign In to add comment