Advertisement
Guest User

Untitled

a guest
Sep 19th, 2019
268
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.37 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # coding: utf-8
  3.  
  4. from bs4 import BeautifulSoup
  5. from selenium import webdriver
  6. import time
  7. import urllib.request
  8.  
  9. def scrape_captcha(idx):
  10.  
  11.     try:
  12.         # Load the test website and navigate to the ReCAPTCHA iframe
  13.         driver = webdriver.Firefox()
  14.         driver.get('https://patrickhlauke.github.io/recaptcha/')
  15.         driver.find_element_by_tag_name('iframe').click()
  16.         time.sleep(2)
  17.         driver.switch_to_frame(driver.find_elements_by_tag_name('iframe')[1])
  18.         soup = BeautifulSoup(driver.page_source, 'html.parser')
  19.         divs = soup.find_all("div")
  20.         driver.close()
  21.         driver.quit()
  22.  
  23.         # Pull the instructions from the "Golden div"
  24.         golden_div = divs[0].find_all('div', {'class':'rc-imageselect-desc-no-canonical'})
  25.         instruction = golden_div[0].text
  26.         instruction_subsection = golden_div[0].span.text
  27.         instruction = instruction[:(-1 * len(instruction_subsection))]
  28.  
  29.         # Extract all images involved with the challenge and save file
  30.         images = []
  31.         image_div = soup.find_all('div', {'class':'rc-image-tile-wrapper'})
  32.        
  33.         for div in image_div:
  34.            
  35.             if div.img.get("src") not in images:
  36.                
  37.                 images.append(div.img.get("src"))
  38.                
  39.         for image in images:
  40.            
  41.             urllib.request.urlretrieve(image, "Data/img/captcha" + str(idx) + ".png")
  42.  
  43.         return True, instruction
  44.  
  45.     except Exception as e:
  46.  
  47.         print(e)
  48.         return False, e
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement