Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from selenium import webdriver
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- import time, traceback
- import random
- import os
- import re
- import urllib
- COURSE_URL = "https://learn.acloud.guru/course/aws-lambda/dashboard"
- USERNAME = "linkedin_user"
- PASSWORD = "linkedin_pass"
- options = webdriver.ChromeOptions()
- #options.add_argument('user-agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:63.0) Gecko/20100101 Firefox/63.0"')
- #options.add_argument('--proxy-server=%s' % "http://127.0.0.1:24000")
- #options.add_argument("headless")
- chrome_prefs = {}
- chrome_prefs["profile.default_content_settings"] = {"images": 2}
- chrome_prefs["profile.managed_default_content_settings"] = {"images": 2}
- options.experimental_options["prefs"] = chrome_prefs
- driver = webdriver.Chrome(chrome_options=options) #, executable_path=r'/usr/bin/chromedriver')
- driver.get(COURSE_URL)
- time.sleep(20)
- elem = driver.find_element_by_xpath('//*[@id="Auth0Container"]/div/div/form/div/div/div[3]/span/div/div/div/div/div/div/div/div/div[2]/div/button[4]')
- elem.click()
- time.sleep(5)
- elem = driver.find_element_by_id("username")
- elem.send_keys(USERNAME)
- elem = driver.find_element_by_id("password")
- elem.send_keys(PASSWORD)
- elem.send_keys(Keys.ENTER)
- time.sleep(5)
- driver.get(COURSE_URL)
- time.sleep(5)
- elems = driver.find_elements_by_xpath("//*[starts-with(@class, 'CourseComponent__InnerContainer')]")
- total = len(elems)
- index = 0
- while True:
- time.sleep(10)
- if index>=total-1:
- break
- try:
- elems = driver.find_elements_by_xpath("//*[starts-with(@class, 'CourseComponent__InnerContainer')]")
- title = elems[index].text.split("\n")[0].encode("utf-8")
- print "Index {} - {}".format(index+1, title)
- elems[index].click()
- time.sleep(10)
- video = 1
- try:
- findElement = WebDriverWait(driver, 60).until(
- EC.presence_of_element_located((By.TAG_NAME, "video"))
- )
- time.sleep(5)
- except Exception as exc:
- video = 0
- try:
- os.mkdir("data")
- except Exception as e:
- pass
- title = title.replace("/","-")
- filename = "data/{:03d}-{}.mp4".format(index+1, title)
- if os.path.exists(filename):
- video = 0
- if video:
- s = "return document.getElementsByTagName('video')[0].src;"
- url = driver.execute_script(s)
- print "Item {}/{}".format(index+1, total)
- print url
- print "=" * 20
- counter = 10
- while counter>0:
- try:
- urllib.urlretrieve(url, filename)
- break
- except Exception as exc:
- counter -= 1
- index+=1
- except Exception as exc:
- print "Error at index : " + str(index)
- pass
- driver.get(COURSE_URL)
- time.sleep(5)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement