Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import requests
- import shutil
- import os
- import string
- import threading
- from threading import Thread
- from subprocess import check_output
- url = 'https://learning.oreilly.com/videos/'
- domain = 'https://learning.oreilly.com/'
- output_folder = "./"
- index =0
- lista = []
- def getVideoURL(cmd,module_name,lesson_name,video_name):
- global url,domain,output_folder,index
- print(index)
- index+=1
- while True:
- try:
- output = check_output(cmd,shell=True)
- break
- except Exception:
- break
- textout= "{} dir={} \n out={}\n".format(output,output_folder + '/' + module_name + '/' + lesson_name + '/',video_name + '.mp4')
- lista.insert(index,textout)
- def main(video_name):
- global url,domain,output_folder
- # video_name = "reactive-microservice-design/9781788626378"
- url = 'https://learning.oreilly.com/videos/'+video_name
- domain = 'https://learning.oreilly.com/'
- output_folder = "./"+video_name.split("/")[0]
- username = 'juan.andres.208@gmail.com'
- password = 'Manuela2008'
- download_count = 5
- req = requests.get(url)
- soup = BeautifulSoup(req.text, 'html.parser')
- lessons = soup.find_all('li', class_='toc-level-1')
- print("Total Chapters : {}".format(len(lessons)))
- shutil.rmtree(output_folder, ignore_errors=True)
- os.makedirs(output_folder)
- module_name = 'Module 0'
- try:
- os.remove("myOutFile.txt")
- os.remove("links.txt")
- except Exception:
- pass
- outF = open("myOutFile.txt", "a")
- outV = open("links.txt", "a")
- i=1
- for lesson in lessons:
- valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
- lesson_name = ''.join(c for c in lesson.a.text if c in valid_chars)
- lesson_name = lesson_name.replace("?","").encode('utf8', 'replace')
- if lesson_name.startswith('Module') and not 'Summary' in lesson_name:
- module_name = lesson_name.encode('utf8', 'replace')
- os.makedirs(output_folder + '/' + module_name)
- for index, video in enumerate(lesson.ol.find_all('a')):
- video_name = str(index+1) + ' - ' + video.text
- video_name = video_name.replace("?","").replace(":","").replace('"',"").encode('utf8', 'replace')
- video_url = video.get('href')
- print ("ACA!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!",video_url)
- video_out = output_folder + '/' + module_name + '/' + video_name + '.mp4'
- cmd = "youtube-dl -u {} -p {} {} -g".format(username, password, video_url)
- # output = check_output(cmd,shell=True)
- #print("URL is {}".format(output))
- # textout= "{} dir={} \n out={}\n".format(output,output_folder + '/' + module_name + '/' + lesson_name + '/',video_name + '.mp4').encode('utf8', 'ignore')
- # outF.write(textout)
- print(lesson_name + '/' + video_name + '.mp4')
- else:
- lesson_name = "{}. {}".format(i,lesson_name)
- os.makedirs(output_folder + '/' + module_name + '/' + lesson_name)
- for index, video in enumerate(lesson.ol.find_all('a')):
- try:
- video_name = str(index+1) + ' - ' + video.text
- video_name = video_name.replace("?","").replace(":","").replace('"',"").encode('utf8', 'replace')
- video_url = video.get('href')
- outV.write(video_url+"\n")
- video_out = output_folder + '/' + module_name + '/' + lesson_name + '/' + video_name + '.mp4'
- cmd = "youtube-dl -u {} -p {} {} -g".format(username, password, video_url)
- Thread(target=getVideoURL,args=(cmd,module_name,lesson_name,video_name)).start()
- print(lesson_name + '/' + video_name + '.mp4')
- # outF.write(textout)
- except Exception:
- pass
- i += 1
- outF.close()
- download_cmd = "aria2c -j {} -i myOutFile.txt".format(download_count)
- #check_output(download_cmd,shell=True)
- os.system(download_cmd)
- list = [
- #"reactive-microservice-design/9781788626378",
- "react-the/9781789132229"]
- for i in list:
- print(i)
- main(i)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement