Untitled

from __future__ import unicode_literals
from bs4 import BeautifulSoup
import requests
import os
import youtube_dl
import datetime

# url = 'https://www.safaribooksonline.com/library/view/ccna-routing-and/9780134580715/'
# url = 'https://www.safaribooksonline.com/library/view/learning-python-web/9781785280351/'
# url = 'https://www.safaribooksonline.com/library/view/web-scraping-in/200000006A0423/'
# url = 'https://www.safaribooksonline.com/library/view/python-design-patterns/9781786460677/'
# url = 'https://www.safaribooksonline.com/library/view/introduction-to-python/9781491904794/'
# url = 'https://www.safaribooksonline.com/library/view/learning-python-web/9781785280351//'
url = 'https://www.safaribooksonline.com/library/view/master-the-fundamentals/200000006A0422/'
domain = 'https://www.safaribooksonline.com'
output_folder = 'output'
username = 'username'
password = 'SuperSecretPassword'
print("*"*100)
print(f"Let's go hacking following resources :))) {url}")
print("*"*100)
lst_exception = []
dict_time = {}

d = os.path.dirname(os.path.abspath(__file__))

req = requests.get(url)

soup = BeautifulSoup(req.text, 'html.parser')


lessons = soup.find_all('li', class_='toc-level-1')
# print("All lessons is: ", len(lessons))
# print("*"*100)

source_category_block = soup.find('div', class_='description t-description')
if 'Book Description' in soup.text:
    source_category = "book"
    # print("Your source category is:", source_category)
elif 'Video Description' in soup.text:
    source_category = "video"
    # print("Your source category is:", source_category)
else:
    source_category = "undefined"
    # print("Your source category is:", source_category)

source_name_block = soup.find('h1', class_='t-title')

if source_name_block:
    source_name = source_name_block.text
    # print("Your source name is:", source_name)
else:
    source_name = None

Path = os.path.join(d, output_folder, source_category, source_name)
os.makedirs(Path, exist_ok=True)
module_name = 'Module 0'

ydl_opts = {
}

for lesson in lessons:
    lesson_name = lesson.a.text
    if lesson_name.startswith('Module') and not 'Summary' in lesson_name:
        module_name = lesson_name
        os.makedirs(Path + '/' + module_name, exist_ok=True)
        for index, video in enumerate(lesson.ol.find_all('a')):
            video_name = str(index) + ' - ' + video.text
            video_url = domain + video.get('href')
            video_out = Path + '/' + module_name + '/' + video_name + '.mp4'
            video_out_for_youtube_dl = Path + '/' + module_name
            print("youtube-dl --output '{}' {}".format(video_out, video_url))
            ydl_opts['outtmpl'] = video_out_for_youtube_dl + "/%(title)s-%(id)s.%(ext)s"
            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                print("Downloads:", video_name)
                try:
                    ydl.download([video_url])
                except Exception as e:
                    lst_exception.append(video.text)
                    print(e, e.args)
        for time_video in lesson.ol.find_all('li'):
            # print(11111111, time_video)
            # print(4444444, time_video.find('a').text)
            # print(5555555, time_video.find('span').text)
            if time_video.find('a').text and time_video.find('span').text:
                dict_time[time_video.find('a').text] = time_video.find('span').text
                # print("aaaaaaaaaaa", dict_time)
    else:
        os.makedirs(Path + '/' + module_name + '/' + lesson_name, exist_ok=True)
        for index, video in enumerate(lesson.ol.find_all('a')):
            video_name = str(index) + ' - ' + video.text
            video_url = domain + video.get('href')
            video_out = Path + '/' + module_name + '/' + lesson_name + '/' + video_name + '.mp4'
            video_out_for_youtube_dl = Path + '/' + module_name + '/' + lesson_name
            print("youtube-dl --output '{}' {}".format(video_out, video_url))
            ydl_opts['outtmpl'] = video_out_for_youtube_dl + "/%(title)s-%(id)s.%(ext)s"
            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                print("Downloads:", video_name)
                try:
                    ydl.download([video_url])
                except Exception as e:
                    lst_exception.append(video.text)
                    print(e, e.args)

        for time_video in lesson.ol.find_all('li'):
            # print(222222222, time_video)
            # print(6666666, time_video.find('a').text)
            # print(7777777, time_video.find('span').text)
            if time_video.find('a').text and time_video.find('span').text:
                dict_time[time_video.find('a').text] = time_video.find('span').text
                # print("bbbbbbbbbbb", dict_time)

print("*"*100)
print("The END :)))"*8)
print("*"*100)
print("Statistic:")
print('\n')
print("All lessons is: ", len(lessons))
print("Your source category is:", source_category)
print("Your source name is:", source_name)
print('\n')
print(f"Number of videos that were available for download is {len(dict_time.keys())}.")
print(f"The amount of video that has been able to download is {len(dict_time.keys()) - len(lst_exception)}.")
print(f"VIDEOS JSON : {dict_time}")

if len(lst_exception) != 0:
    print('\n')
    print("<----------     You have the following Exception !!!      -------------->")
    print('\n')
    print(f"Not downloads {len(lst_exception)} resources: ")
    for elem in lst_exception:
        if dict_time.get(elem):
            print({elem: dict_time.get(elem)})

timeList_available = dict_time.values()
sum_available = datetime.timedelta()
for i in timeList_available:
    (h, m, s) = i.split(':')
    d = datetime.timedelta(hours=int(h), minutes=int(m), seconds=int(s))
    sum_available += d

# print(7865435678654567865456786543, list(dict_time))
for el in lst_exception:
    # print(34245324242452452, el)
    if el in list(dict_time):
        # print("REWHQGRGQWJGWGHDFJBFDHASHFDBBFCSCFHSHFKCABSHFBAKSFBAKFFMNAFAM")
        dict_time.pop(el, None)
        # print("jkjhgfd", dict_time, type(dict_time))

timeList_downloads = dict_time.values()
sum_downloads = datetime.timedelta()
for i in timeList_downloads:
    (h, m, s) = i.split(':')
    ddd = datetime.timedelta(hours=int(h), minutes=int(m), seconds=int(s))
    sum_downloads += ddd


print('\n')
print("<++++++++++     Information about time watch videos      ++++++++++++++>")
print('\n')
print(f"Summary time watch video (available) is : {str(sum_available)}")
print(f"Summary time watch video (downloads) is : {str(sum_downloads)}")


"""
def my_hook(d):
    if d['status'] == 'finished':
        print('Done downloading, now converting ...')

ydl_opts = {
    'format': 'bestaudio/best',
    'outtmpl': '%(id)s',
    'noplaylist' : True,
    'progress_hooks': [my_hook],
}

with youtube_dl.YoutubeDL(ydl_opts) as ydl:
    ydl.download(['https://www.youtube.com/watch?v=pwp1CH5R-w4'])

"""

"""
timeList = [ '0:00:00', '0:00:15', '9:30:56' ]
totalSecs = 0
for tm in timeList:
    timeParts = [int(s) for s in tm.split(':')]
    totalSecs += (timeParts[0] * 60 + timeParts[1]) * 60 + timeParts[2]
totalSecs, sec = divmod(totalSecs, 60)
hr, min = divmod(totalSecs, 60)
print "%d:%02d:%02d" % (hr, min, sec)


import datetime

timeList = ['0:00:00', '0:00:15', '9:30:56']
sum = datetime.timedelta()
for i in timeList:
    (h, m, s) = i.split(':')
    d = datetime.timedelta(hours=int(h), minutes=int(m), seconds=int(s))
    sum += d
print(str(sum))


timeList = [ '0:00:00', '0:00:15', '9:30:56' ]

ttt = [map(int,i.split()[-1].split(':')) for i in timeList]
seconds=reduce(lambda x,y:x+y[0]*3600+y[1]*60+y[2],ttt,0)
#seconds == 34271
This one looks horrible too ->

zero_time = datetime.datetime.strptime('0:0:0', '%H:%M:%S')
ttt=[datetime.datetime.strptime(i, '%H:%M:%S')-zero_time for i in timeList]
delta=sum(ttt,zero_time)-zero_time
# delta==datetime.timedelta(0, 34271)

# str(delta)=='9:31:11' # this seems good, but
# if we have more than 1 day we get for example str(delta)=='1 day, 1:05:22'
Really frustrating is also this ->

sum(ttt,zero_time).strftime('%H:%M:%S')  # it is only "modulo" 24 :(
I really like to see one-liner so, I tried to make one in python3 :P (good result but horrible look)

import functools
timeList = ['0:00:00','0:00:15','9:30:56','21:00:00'] # notice additional 21 hours!
sum_fnc=lambda ttt:(lambda a:'%02d:%02d:%02d' % (divmod(divmod(a,60)[0],60)+(divmod(a,60)[1],)))((lambda a:functools.reduce(lambda x,y:x+y[0]*3600+y[1]*60+y[2],a,0))((lambda a:[list(map(int,i.split()[-1].split(':'))) for i in a])(ttt)))
# sum_fnc(timeList) -> '30:40:11'


lines = ["0:00:00", "0:00:15", "9:30:56"]
total = 0
for line in lines:
    h, m, s = map(int, line.split(":"))
    total += 3600*h + 60*m + s
print "%02d:%02d:%02d" % (total / 3600, total / 60 % 60, total % 60)
"""