Untitled

#!/usr/bin/env python
# coding: utf-8

from bs4 import BeautifulSoup
from selenium import webdriver
import time
import urllib.request

def scrape_captcha(idx):

    try:
        # Load the test website and navigate to the ReCAPTCHA iframe
        driver = webdriver.Firefox()
        driver.get('https://patrickhlauke.github.io/recaptcha/')
        driver.find_element_by_tag_name('iframe').click()
        time.sleep(2)
        driver.switch_to_frame(driver.find_elements_by_tag_name('iframe')[1])
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        divs = soup.find_all("div")
        driver.close()
        driver.quit()

        # Pull the instructions from the "Golden div"
        golden_div = divs[0].find_all('div', {'class':'rc-imageselect-desc-no-canonical'})
        instruction = golden_div[0].text
        instruction_subsection = golden_div[0].span.text
        instruction = instruction[:(-1 * len(instruction_subsection))]

        # Extract all images involved with the challenge and save file
        images = []
        image_div = soup.find_all('div', {'class':'rc-image-tile-wrapper'})

        for div in image_div:

            if div.img.get("src") not in images:

                images.append(div.img.get("src"))

        for image in images:

            urllib.request.urlretrieve(image, "Data/img/captcha" + str(idx) + ".png")

        return True, instruction

    except Exception as e:

        print(e)
        return False, e