Try95th

scrapeGmailDotTrix for so_q_74946967

Dec 29th, 2022 (edited)
171
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.35 KB | None | 0 0
  1. ## for https://stackoverflow.com/q/74946967/6146136
  2. ## to scrape JavaScript-generated output from https://gmail.inputekno.com/
  3. ############################## sample usage: ##############################
  4. # scrapeGmailDotTrix('usr') # returns
  5. ###########################################################################
  6.  
  7. ## YOU MUST DOWNLOAD CHROMEDRIVER.EXE FOR THIS ##
  8. ## https://chromedriver.chromium.org/downloads ##
  9.  
  10. import sys ## [ just for printing line number in case of error ]
  11. from selenium import webdriver
  12. from selenium.webdriver.common.by import By
  13. from selenium.webdriver.support.ui import WebDriverWait
  14. from selenium.webdriver.support import expected_conditions as EC
  15.  
  16. def scrapeGmailDotTrix(username: str, wait_start=10, wait_op=200):
  17.     ## validating input arguments ##
  18.     if not (isinstance(wait_start, int) and wait_start > 0): wait_start = 10
  19.     if not (isinstance(wait_op, int)) and wait_op > 0: wait_op = 200
  20.  
  21.     ## [ XPaths cheatsheet at https://devhints.io/xpath ] ##
  22.     x_inp = '//input[@id="username"][@type="text"]'  # input
  23.     x_ctr = '//label[@id="counter"][@for="emails"]'  # counter
  24.     x_ota = '//textarea[@id="emails"]'  # output textarea
  25.  
  26.     try:
  27.         driver = webdriver.Chrome()  # no need for path because
  28.         # I copied chromedriver.exe to the same folder as this py file
  29.  
  30.         ## [just to shorten lines] ##
  31.         isVisible = EC.visibility_of_element_located
  32.         valContains = EC.text_to_be_present_in_element_value
  33.         wait_start = WebDriverWait(driver, wait_start)
  34.         wait_op = WebDriverWait(driver, wait_op)
  35.         findEl = driver.find_element
  36.  
  37.         ## go to site ##
  38.         driver.get('https://gmail.inputekno.com/')
  39.  
  40.         ## wait to load input, output textarea and counter ##
  41.         for x in [x_inp, x_ctr, x_ota]:
  42.             try:
  43.                 wait_start.until(isVisible((By.XPATH, x)))
  44.             except Exception as e1:
  45.                 print(f'Failed to load "{x}" - {type(e1)} Message: "{e1}"')
  46.  
  47.         ## enter username ##
  48.         findEl(By.XPATH, x_inp).send_keys(str(username))
  49.  
  50.         ## wait to load {[email protected]} in output ##
  51.         try:
  52.             lastEm = '.'.join(str(username))
  53.             wait_op.until(valContains((By.XPATH, x_ota), lastEm))
  54.         except Exception as e1:
  55.             print(f'Failed to load output - {type(e1)} Message: "{e1}"')
  56.  
  57.         ## get output value ##
  58.         genEm = findEl(By.XPATH, x_ota).get_attribute('value')
  59.        
  60.         driver.quit() # close browser
  61.         del driver  # just in case
  62.        
  63.         ######## split lines and trim whitespace ########
  64.         return [em.strip() for em in genEm.splitlines()]
  65.         ############ list of emails returned ############
  66.         #################################################
  67.     except Exception as e:
  68.         ## in case of error ##
  69.         errMsg = f'{type(e)} on UNKNOWN_LINE - Message: "{str(e)}"'
  70.         try:
  71.             et, em, tb = sys.exc_info() ## get line number ##
  72.             errMsg = f'{et} on line {tb.tb_lineno} - Message: "{em}"'
  73.         except Exception as e2:
  74.             print(f'failed to get errorline  -', str(e2))
  75.         print(f'Error generating emails for "{username}":', errMsg)
  76.         return [] ## return empty list after printing error ##
  77.  
  78.  
Advertisement
Add Comment
Please, Sign In to add comment