Advertisement
Guest User

Untitled

a guest
Jan 25th, 2018
154
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.89 KB | None | 0 0
  1. from __future__ import unicode_literals
  2. from bs4 import BeautifulSoup
  3. import requests
  4. import os
  5. import youtube_dl
  6. import datetime
  7.  
  8. # url = 'https://www.safaribooksonline.com/library/view/ccna-routing-and/9780134580715/'
  9. # url = 'https://www.safaribooksonline.com/library/view/learning-python-web/9781785280351/'
  10. # url = 'https://www.safaribooksonline.com/library/view/web-scraping-in/200000006A0423/'
  11. # url = 'https://www.safaribooksonline.com/library/view/python-design-patterns/9781786460677/'
  12. # url = 'https://www.safaribooksonline.com/library/view/introduction-to-python/9781491904794/'
  13. # url = 'https://www.safaribooksonline.com/library/view/learning-python-web/9781785280351//'
  14. url = 'https://www.safaribooksonline.com/library/view/master-the-fundamentals/200000006A0422/'
  15. domain = 'https://www.safaribooksonline.com'
  16. output_folder = 'output'
  17. username = 'username'
  18. password = 'SuperSecretPassword'
  19. print("*"*100)
  20. print(f"Let's go hacking following resources :))) {url}")
  21. print("*"*100)
  22. lst_exception = []
  23. dict_time = {}
  24.  
  25. d = os.path.dirname(os.path.abspath(__file__))
  26.  
  27. req = requests.get(url)
  28.  
  29. soup = BeautifulSoup(req.text, 'html.parser')
  30.  
  31.  
  32. lessons = soup.find_all('li', class_='toc-level-1')
  33. # print("All lessons is: ", len(lessons))
  34. # print("*"*100)
  35.  
  36. source_category_block = soup.find('div', class_='description t-description')
  37. if 'Book Description' in soup.text:
  38.     source_category = "book"
  39.     # print("Your source category is:", source_category)
  40. elif 'Video Description' in soup.text:
  41.     source_category = "video"
  42.     # print("Your source category is:", source_category)
  43. else:
  44.     source_category = "undefined"
  45.     # print("Your source category is:", source_category)
  46.  
  47. source_name_block = soup.find('h1', class_='t-title')
  48.  
  49. if source_name_block:
  50.     source_name = source_name_block.text
  51.     # print("Your source name is:", source_name)
  52. else:
  53.     source_name = None
  54.  
  55. Path = os.path.join(d, output_folder, source_category, source_name)
  56. os.makedirs(Path, exist_ok=True)
  57. module_name = 'Module 0'
  58.  
  59. ydl_opts = {
  60. }
  61.  
  62. for lesson in lessons:
  63.     lesson_name = lesson.a.text
  64.     if lesson_name.startswith('Module') and not 'Summary' in lesson_name:
  65.         module_name = lesson_name
  66.         os.makedirs(Path + '/' + module_name, exist_ok=True)
  67.         for index, video in enumerate(lesson.ol.find_all('a')):
  68.             video_name = str(index) + ' - ' + video.text
  69.             video_url = domain + video.get('href')
  70.             video_out = Path + '/' + module_name + '/' + video_name + '.mp4'
  71.             video_out_for_youtube_dl = Path + '/' + module_name
  72.             print("youtube-dl --output '{}' {}".format(video_out, video_url))
  73.             ydl_opts['outtmpl'] = video_out_for_youtube_dl + "/%(title)s-%(id)s.%(ext)s"
  74.             with youtube_dl.YoutubeDL(ydl_opts) as ydl:
  75.                 print("Downloads:", video_name)
  76.                 try:
  77.                     ydl.download([video_url])
  78.                 except Exception as e:
  79.                     lst_exception.append(video.text)
  80.                     print(e, e.args)
  81.         for time_video in lesson.ol.find_all('li'):
  82.             # print(11111111, time_video)
  83.             # print(4444444, time_video.find('a').text)
  84.             # print(5555555, time_video.find('span').text)
  85.             if time_video.find('a').text and time_video.find('span').text:
  86.                 dict_time[time_video.find('a').text] = time_video.find('span').text
  87.                 # print("aaaaaaaaaaa", dict_time)
  88.     else:
  89.         os.makedirs(Path + '/' + module_name + '/' + lesson_name, exist_ok=True)
  90.         for index, video in enumerate(lesson.ol.find_all('a')):
  91.             video_name = str(index) + ' - ' + video.text
  92.             video_url = domain + video.get('href')
  93.             video_out = Path + '/' + module_name + '/' + lesson_name + '/' + video_name + '.mp4'
  94.             video_out_for_youtube_dl = Path + '/' + module_name + '/' + lesson_name
  95.             print("youtube-dl --output '{}' {}".format(video_out, video_url))
  96.             ydl_opts['outtmpl'] = video_out_for_youtube_dl + "/%(title)s-%(id)s.%(ext)s"
  97.             with youtube_dl.YoutubeDL(ydl_opts) as ydl:
  98.                 print("Downloads:", video_name)
  99.                 try:
  100.                     ydl.download([video_url])
  101.                 except Exception as e:
  102.                     lst_exception.append(video.text)
  103.                     print(e, e.args)
  104.  
  105.         for time_video in lesson.ol.find_all('li'):
  106.             # print(222222222, time_video)
  107.             # print(6666666, time_video.find('a').text)
  108.             # print(7777777, time_video.find('span').text)
  109.             if time_video.find('a').text and time_video.find('span').text:
  110.                 dict_time[time_video.find('a').text] = time_video.find('span').text
  111.                 # print("bbbbbbbbbbb", dict_time)
  112.  
  113. print("*"*100)
  114. print("The END :)))"*8)
  115. print("*"*100)
  116. print("Statistic:")
  117. print('\n')
  118. print("All lessons is: ", len(lessons))
  119. print("Your source category is:", source_category)
  120. print("Your source name is:", source_name)
  121. print('\n')
  122. print(f"Number of videos that were available for download is {len(dict_time.keys())}.")
  123. print(f"The amount of video that has been able to download is {len(dict_time.keys()) - len(lst_exception)}.")
  124. print(f"VIDEOS JSON : {dict_time}")
  125.  
  126. if len(lst_exception) != 0:
  127.     print('\n')
  128.     print("<----------     You have the following Exception !!!      -------------->")
  129.     print('\n')
  130.     print(f"Not downloads {len(lst_exception)} resources: ")
  131.     for elem in lst_exception:
  132.         if dict_time.get(elem):
  133.             print({elem: dict_time.get(elem)})
  134.  
  135. timeList_available = dict_time.values()
  136. sum_available = datetime.timedelta()
  137. for i in timeList_available:
  138.     (h, m, s) = i.split(':')
  139.     d = datetime.timedelta(hours=int(h), minutes=int(m), seconds=int(s))
  140.     sum_available += d
  141.  
  142. # print(7865435678654567865456786543, list(dict_time))
  143. for el in lst_exception:
  144.     # print(34245324242452452, el)
  145.     if el in list(dict_time):
  146.         # print("REWHQGRGQWJGWGHDFJBFDHASHFDBBFCSCFHSHFKCABSHFBAKSFBAKFFMNAFAM")
  147.         dict_time.pop(el, None)
  148.         # print("jkjhgfd", dict_time, type(dict_time))
  149.  
  150. timeList_downloads = dict_time.values()
  151. sum_downloads = datetime.timedelta()
  152. for i in timeList_downloads:
  153.     (h, m, s) = i.split(':')
  154.     ddd = datetime.timedelta(hours=int(h), minutes=int(m), seconds=int(s))
  155.     sum_downloads += ddd
  156.  
  157.  
  158. print('\n')
  159. print("<++++++++++     Information about time watch videos      ++++++++++++++>")
  160. print('\n')
  161. print(f"Summary time watch video (available) is : {str(sum_available)}")
  162. print(f"Summary time watch video (downloads) is : {str(sum_downloads)}")
  163.  
  164.  
  165.  
  166. """
  167. def my_hook(d):
  168.    if d['status'] == 'finished':
  169.        print('Done downloading, now converting ...')
  170.  
  171. ydl_opts = {
  172.    'format': 'bestaudio/best',
  173.    'outtmpl': '%(id)s',
  174.    'noplaylist' : True,
  175.    'progress_hooks': [my_hook],
  176. }
  177.  
  178. with youtube_dl.YoutubeDL(ydl_opts) as ydl:
  179.    ydl.download(['https://www.youtube.com/watch?v=pwp1CH5R-w4'])
  180.  
  181. """
  182.  
  183. """
  184. timeList = [ '0:00:00', '0:00:15', '9:30:56' ]
  185. totalSecs = 0
  186. for tm in timeList:
  187.    timeParts = [int(s) for s in tm.split(':')]
  188.    totalSecs += (timeParts[0] * 60 + timeParts[1]) * 60 + timeParts[2]
  189. totalSecs, sec = divmod(totalSecs, 60)
  190. hr, min = divmod(totalSecs, 60)
  191. print "%d:%02d:%02d" % (hr, min, sec)
  192.  
  193.  
  194.  
  195. import datetime
  196.  
  197. timeList = ['0:00:00', '0:00:15', '9:30:56']
  198. sum = datetime.timedelta()
  199. for i in timeList:
  200.    (h, m, s) = i.split(':')
  201.    d = datetime.timedelta(hours=int(h), minutes=int(m), seconds=int(s))
  202.    sum += d
  203. print(str(sum))
  204.  
  205.  
  206.  
  207. timeList = [ '0:00:00', '0:00:15', '9:30:56' ]
  208.  
  209. ttt = [map(int,i.split()[-1].split(':')) for i in timeList]
  210. seconds=reduce(lambda x,y:x+y[0]*3600+y[1]*60+y[2],ttt,0)
  211. #seconds == 34271
  212. This one looks horrible too ->
  213.  
  214. zero_time = datetime.datetime.strptime('0:0:0', '%H:%M:%S')
  215. ttt=[datetime.datetime.strptime(i, '%H:%M:%S')-zero_time for i in timeList]
  216. delta=sum(ttt,zero_time)-zero_time
  217. # delta==datetime.timedelta(0, 34271)
  218.  
  219. # str(delta)=='9:31:11' # this seems good, but
  220. # if we have more than 1 day we get for example str(delta)=='1 day, 1:05:22'
  221. Really frustrating is also this ->
  222.  
  223. sum(ttt,zero_time).strftime('%H:%M:%S')  # it is only "modulo" 24 :(
  224. I really like to see one-liner so, I tried to make one in python3 :P (good result but horrible look)
  225.  
  226. import functools
  227. timeList = ['0:00:00','0:00:15','9:30:56','21:00:00'] # notice additional 21 hours!
  228. sum_fnc=lambda ttt:(lambda a:'%02d:%02d:%02d' % (divmod(divmod(a,60)[0],60)+(divmod(a,60)[1],)))((lambda a:functools.reduce(lambda x,y:x+y[0]*3600+y[1]*60+y[2],a,0))((lambda a:[list(map(int,i.split()[-1].split(':'))) for i in a])(ttt)))
  229. # sum_fnc(timeList) -> '30:40:11'
  230.  
  231.  
  232. lines = ["0:00:00", "0:00:15", "9:30:56"]
  233. total = 0
  234. for line in lines:
  235.    h, m, s = map(int, line.split(":"))
  236.    total += 3600*h + 60*m + s
  237. print "%02d:%02d:%02d" % (total / 3600, total / 60 % 60, total % 60)
  238. """
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement