Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import unicode_literals
- from bs4 import BeautifulSoup
- import requests
- import os
- import youtube_dl
- url = 'https://www.safaribooksonline.com/library/view/ccna-routing-and/9780134580715/'
- domain = 'https://www.safaribooksonline.com'
- output_folder = 'output'
- username = 'username'
- password = 'SuperSecretPassword'
- d = os.path.dirname(os.path.abspath(__file__))
- req = requests.get(url)
- soup = BeautifulSoup(req.text, 'html.parser')
- lessons = soup.find_all('li', class_='toc-level-1')
- print("All lessons is: ", len(lessons))
- print("*"*100)
- source_category_block = soup.find('div', class_='description t-description')
- if 'Book Description' in soup.text:
- source_category = "book"
- print("Your source category is:", source_category)
- elif 'Video Description' in soup.text:
- source_category = "video"
- print("Your source category is:", source_category)
- else:
- source_category = "undefined"
- print("Your source category is:", source_category)
- source_name_block = soup.find('h1', class_='t-title')
- if source_name_block:
- source_name = source_name_block.text
- print("Your source name is:", source_name)
- else:
- source_name = None
- Path = os.path.join(d, output_folder, source_category, source_name)
- os.makedirs(Path, exist_ok=True)
- module_name = 'Module 0'
- for lesson in lessons:
- lesson_name = lesson.a.text
- if lesson_name.startswith('Module') and not 'Summary' in lesson_name:
- module_name = lesson_name
- os.makedirs(Path + '/' + module_name, exist_ok=True)
- for index, video in enumerate(lesson.ol.find_all('a')):
- video_name = str(index) + ' - ' + video.text
- video_url = domain + video.get('href')
- video_out = Path + '/' + module_name + '/' + video_name + '.mp4'
- print("youtube-dl --output '{}' {}".format(video_out, video_url))
- else:
- os.makedirs(Path + '/' + module_name + '/' + lesson_name, exist_ok=True)
- for index, video in enumerate(lesson.ol.find_all('a')):
- video_name = str(index) + ' - ' + video.text
- video_url = domain + video.get('href')
- video_out = Path + '/' + module_name + '/' + lesson_name + '/' + video_name + '.mp4'
- print("youtube-dl --output '{}' {}".format(video_out, video_url))
- def my_hook(d):
- if d['status'] == 'finished':
- print('Done downloading, now converting ...')
- ydl_opts = {
- 'format': 'bestaudio/best',
- 'outtmpl': '%(id)s',
- 'noplaylist' : True,
- 'progress_hooks': [my_hook],
- }
- with youtube_dl.YoutubeDL(ydl_opts) as ydl:
- ydl.download(['https://www.youtube.com/watch?v=pwp1CH5R-w4'])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement