Advertisement
Guest User

Untitled

a guest
Feb 19th, 2019
140
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.21 KB | None | 0 0
  1. from selenium import  webdriver
  2. from selenium.webdriver.common.keys import Keys
  3. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  4. from selenium.webdriver.common.by import By
  5. from selenium.webdriver.support.ui import WebDriverWait
  6. from selenium.webdriver.support import expected_conditions as EC
  7. import time, traceback
  8. import random
  9. import os
  10. import re
  11. import urllib
  12.  
  13. COURSE_URL = "https://learn.acloud.guru/course/aws-lambda/dashboard"
  14.  
  15. USERNAME = "linkedin_user"
  16. PASSWORD = "linkedin_pass"
  17.  
  18. options = webdriver.ChromeOptions()
  19. #options.add_argument('user-agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:63.0) Gecko/20100101 Firefox/63.0"')
  20. #options.add_argument('--proxy-server=%s' % "http://127.0.0.1:24000")
  21. #options.add_argument("headless")
  22. chrome_prefs = {}
  23. chrome_prefs["profile.default_content_settings"] = {"images": 2}
  24. chrome_prefs["profile.managed_default_content_settings"] = {"images": 2}
  25. options.experimental_options["prefs"] = chrome_prefs
  26.  
  27. driver = webdriver.Chrome(chrome_options=options) #, executable_path=r'/usr/bin/chromedriver')
  28.  
  29. driver.get(COURSE_URL)
  30. time.sleep(20)
  31.  
  32. elem = driver.find_element_by_xpath('//*[@id="Auth0Container"]/div/div/form/div/div/div[3]/span/div/div/div/div/div/div/div/div/div[2]/div/button[4]')
  33. elem.click()
  34. time.sleep(5)
  35.  
  36.  
  37. elem = driver.find_element_by_id("username")
  38. elem.send_keys(USERNAME)
  39.  
  40. elem = driver.find_element_by_id("password")
  41. elem.send_keys(PASSWORD)
  42. elem.send_keys(Keys.ENTER)
  43.  
  44. time.sleep(5)
  45.  
  46. driver.get(COURSE_URL)
  47. time.sleep(5)
  48.  
  49. elems = driver.find_elements_by_xpath("//*[starts-with(@class, 'CourseComponent__InnerContainer')]")
  50. total = len(elems)
  51. index = 0
  52.  
  53. while True:
  54.     time.sleep(10)
  55.  
  56.     if index>=total-1:
  57.         break
  58.  
  59.     try:
  60.        
  61.         elems = driver.find_elements_by_xpath("//*[starts-with(@class, 'CourseComponent__InnerContainer')]")
  62.         title = elems[index].text.split("\n")[0].encode("utf-8")
  63.         print "Index {} - {}".format(index+1, title)
  64.         elems[index].click()
  65.         time.sleep(10)
  66.  
  67.         video = 1
  68.         try:
  69.             findElement = WebDriverWait(driver, 60).until(
  70.                 EC.presence_of_element_located((By.TAG_NAME, "video"))
  71.             )
  72.             time.sleep(5)
  73.         except Exception as exc:
  74.             video = 0
  75.  
  76.         try:
  77.             os.mkdir("data")
  78.         except Exception as e:
  79.             pass
  80.        
  81.         title = title.replace("/","-")
  82.         filename = "data/{:03d}-{}.mp4".format(index+1, title)
  83.         if os.path.exists(filename):
  84.             video = 0
  85.  
  86.         if video:
  87.             s = "return document.getElementsByTagName('video')[0].src;"
  88.             url = driver.execute_script(s)
  89.             print "Item {}/{}".format(index+1, total)
  90.             print url
  91.             print "=" * 20
  92.  
  93.             counter = 10
  94.             while counter>0:
  95.                 try:
  96.                     urllib.urlretrieve(url, filename)
  97.                     break
  98.                 except Exception as exc:
  99.                     counter -= 1
  100.            
  101.         index+=1
  102.  
  103.     except Exception as exc:
  104.         print "Error at index : " + str(index)
  105.         pass
  106.  
  107.     driver.get(COURSE_URL)
  108.     time.sleep(5)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement