Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## for https://stackoverflow.com/q/74946967/6146136
- ## to scrape JavaScript-generated output from https://gmail.inputekno.com/
- ############################## sample usage: ##############################
- # scrapeGmailDotTrix('usr') # returns
- # ['[email protected]', '[email protected]', '[email protected]', '[email protected]']
- ###########################################################################
- ## YOU MUST DOWNLOAD CHROMEDRIVER.EXE FOR THIS ##
- ## https://chromedriver.chromium.org/downloads ##
- import sys ## [ just for printing line number in case of error ]
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- def scrapeGmailDotTrix(username: str, wait_start=10, wait_op=200):
- ## validating input arguments ##
- if not (isinstance(wait_start, int) and wait_start > 0): wait_start = 10
- if not (isinstance(wait_op, int)) and wait_op > 0: wait_op = 200
- ## [ XPaths cheatsheet at https://devhints.io/xpath ] ##
- x_inp = '//input[@id="username"][@type="text"]' # input
- x_ctr = '//label[@id="counter"][@for="emails"]' # counter
- x_ota = '//textarea[@id="emails"]' # output textarea
- try:
- driver = webdriver.Chrome() # no need for path because
- # I copied chromedriver.exe to the same folder as this py file
- ## [just to shorten lines] ##
- isVisible = EC.visibility_of_element_located
- valContains = EC.text_to_be_present_in_element_value
- wait_start = WebDriverWait(driver, wait_start)
- wait_op = WebDriverWait(driver, wait_op)
- findEl = driver.find_element
- ## go to site ##
- driver.get('https://gmail.inputekno.com/')
- ## wait to load input, output textarea and counter ##
- for x in [x_inp, x_ctr, x_ota]:
- try:
- wait_start.until(isVisible((By.XPATH, x)))
- except Exception as e1:
- print(f'Failed to load "{x}" - {type(e1)} Message: "{e1}"')
- ## enter username ##
- findEl(By.XPATH, x_inp).send_keys(str(username))
- ## wait to load {[email protected]} in output ##
- try:
- lastEm = '.'.join(str(username))
- wait_op.until(valContains((By.XPATH, x_ota), lastEm))
- except Exception as e1:
- print(f'Failed to load output - {type(e1)} Message: "{e1}"')
- ## get output value ##
- genEm = findEl(By.XPATH, x_ota).get_attribute('value')
- driver.quit() # close browser
- del driver # just in case
- ######## split lines and trim whitespace ########
- return [em.strip() for em in genEm.splitlines()]
- ############ list of emails returned ############
- #################################################
- except Exception as e:
- ## in case of error ##
- errMsg = f'{type(e)} on UNKNOWN_LINE - Message: "{str(e)}"'
- try:
- et, em, tb = sys.exc_info() ## get line number ##
- errMsg = f'{et} on line {tb.tb_lineno} - Message: "{em}"'
- except Exception as e2:
- print(f'failed to get errorline -', str(e2))
- print(f'Error generating emails for "{username}":', errMsg)
- return [] ## return empty list after printing error ##
Advertisement
Add Comment
Please, Sign In to add comment