Advertisement
Guest User

Untitled

a guest
Apr 7th, 2019
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.35 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import requests
  3. import shutil
  4. import os
  5. import string
  6. import threading
  7. from threading import Thread
  8.  
  9. from subprocess import check_output
  10. url = 'https://learning.oreilly.com/videos/'
  11. domain = 'https://learning.oreilly.com/'
  12. output_folder = "./"
  13. index =0
  14. lista = []
  15.  
  16. def getVideoURL(cmd,module_name,lesson_name,video_name):
  17. global url,domain,output_folder,index
  18. print(index)
  19. index+=1
  20. while True:
  21. try:
  22. output = check_output(cmd,shell=True)
  23. break
  24. except Exception:
  25. break
  26. textout= "{} dir={} \n out={}\n".format(output,output_folder + '/' + module_name + '/' + lesson_name + '/',video_name + '.mp4')
  27. lista.insert(index,textout)
  28.  
  29.  
  30. def main(video_name):
  31. global url,domain,output_folder
  32. # video_name = "reactive-microservice-design/9781788626378"
  33. url = 'https://learning.oreilly.com/videos/'+video_name
  34. domain = 'https://learning.oreilly.com/'
  35. output_folder = "./"+video_name.split("/")[0]
  36. username = 'juan.andres.208@gmail.com'
  37. password = 'Manuela2008'
  38. download_count = 5
  39.  
  40. req = requests.get(url)
  41. soup = BeautifulSoup(req.text, 'html.parser')
  42.  
  43. lessons = soup.find_all('li', class_='toc-level-1')
  44. print("Total Chapters : {}".format(len(lessons)))
  45.  
  46. shutil.rmtree(output_folder, ignore_errors=True)
  47. os.makedirs(output_folder)
  48. module_name = 'Module 0'
  49.  
  50. try:
  51. os.remove("myOutFile.txt")
  52. os.remove("links.txt")
  53. except Exception:
  54. pass
  55.  
  56. outF = open("myOutFile.txt", "a")
  57. outV = open("links.txt", "a")
  58. i=1
  59.  
  60. for lesson in lessons:
  61. valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
  62. lesson_name = ''.join(c for c in lesson.a.text if c in valid_chars)
  63. lesson_name = lesson_name.replace("?","").encode('utf8', 'replace')
  64.  
  65. if lesson_name.startswith('Module') and not 'Summary' in lesson_name:
  66. module_name = lesson_name.encode('utf8', 'replace')
  67. os.makedirs(output_folder + '/' + module_name)
  68. for index, video in enumerate(lesson.ol.find_all('a')):
  69. video_name = str(index+1) + ' - ' + video.text
  70. video_name = video_name.replace("?","").replace(":","").replace('"',"").encode('utf8', 'replace')
  71. video_url = video.get('href')
  72. print ("ACA!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!",video_url)
  73. video_out = output_folder + '/' + module_name + '/' + video_name + '.mp4'
  74. cmd = "youtube-dl -u {} -p {} {} -g".format(username, password, video_url)
  75. # output = check_output(cmd,shell=True)
  76. #print("URL is {}".format(output))
  77. # textout= "{} dir={} \n out={}\n".format(output,output_folder + '/' + module_name + '/' + lesson_name + '/',video_name + '.mp4').encode('utf8', 'ignore')
  78. # outF.write(textout)
  79. print(lesson_name + '/' + video_name + '.mp4')
  80. else:
  81. lesson_name = "{}. {}".format(i,lesson_name)
  82. os.makedirs(output_folder + '/' + module_name + '/' + lesson_name)
  83. for index, video in enumerate(lesson.ol.find_all('a')):
  84. try:
  85. video_name = str(index+1) + ' - ' + video.text
  86. video_name = video_name.replace("?","").replace(":","").replace('"',"").encode('utf8', 'replace')
  87. video_url = video.get('href')
  88. outV.write(video_url+"\n")
  89. video_out = output_folder + '/' + module_name + '/' + lesson_name + '/' + video_name + '.mp4'
  90. cmd = "youtube-dl -u {} -p {} {} -g".format(username, password, video_url)
  91. Thread(target=getVideoURL,args=(cmd,module_name,lesson_name,video_name)).start()
  92. print(lesson_name + '/' + video_name + '.mp4')
  93. # outF.write(textout)
  94. except Exception:
  95. pass
  96.  
  97. i += 1
  98.  
  99. outF.close()
  100. download_cmd = "aria2c -j {} -i myOutFile.txt".format(download_count)
  101. #check_output(download_cmd,shell=True)
  102. os.system(download_cmd)
  103. list = [
  104. #"reactive-microservice-design/9781788626378",
  105. "react-the/9781789132229"]
  106. for i in list:
  107. print(i)
  108. main(i)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement