Advertisement
dimkiriakos

youtube playlist downloader

Jul 21st, 2022
1,018
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.15 KB | None | 0 0
  1. """
  2. requirements
  3.    pip install pytube
  4.  
  5. usage:
  6.    name the python file as you want: for example downloader.py
  7.    copy the playlist url it should be like the  following url: https://www.youtube.com/playlist?list=PLZoTAELRMXVPBTrWtJkn3wWQxZkmTXGwe
  8.    in command line type the following:
  9.    python downloader.py <here paste the playlist url>
  10.  
  11. """
  12.  
  13. import urllib.request
  14. import urllib.error
  15.  
  16. import re
  17. import sys
  18. import time
  19. import os
  20.  
  21. from pytube import YouTube
  22.  
  23. class progressBar:
  24.     def __init__(self, barlength=25):
  25.         self.barlength = barlength
  26.         self.position = 0
  27.         self.longest = 0
  28.  
  29.     def print_progress(self, cur, total, start):
  30.         currentper = cur / total
  31.         elapsed = int(time.clock() - start) + 1
  32.         curbar = int(currentper * self.barlength)
  33.         bar = '\r[' + '='.join(['' for _ in range(curbar)])  # Draws Progress
  34.         bar += '>'
  35.         bar += ' '.join(['' for _ in range(int(self.barlength - curbar))]) + '] '  # Pads remaining space
  36.         bar += bytestostr(cur / elapsed) + '/s '  # Calculates Rate
  37.         bar += getHumanTime((total - cur) * (elapsed / cur)) + ' left'  # Calculates Remaining time
  38.         if len(bar) > self.longest:  # Keeps track of space to over write
  39.             self.longest = len(bar)
  40.             bar += ' '.join(['' for _ in range(self.longest - len(bar))])
  41.         sys.stdout.write(bar)
  42.  
  43.     def print_end(self, *args):  # Clears Progress Bar
  44.         sys.stdout.write('\r{0}\r'.format((' ' for _ in range(self.longest))))
  45.  
  46. def getHumanTime(sec):
  47.     if sec >= 3600:  # Converts to Hours
  48.         return '{0:d} hour(s)'.format(int(sec / 3600))
  49.     elif sec >= 60:  # Converts to Minutes
  50.         return '{0:d} minute(s)'.format(int(sec / 60))
  51.     else:            # No Conversion
  52.         return '{0:d} second(s)'.format(int(sec))
  53.  
  54. def bytestostr(bts):
  55.     bts = float(bts)
  56.     if bts >= 1024 ** 4:    # Converts to Terabytes
  57.         terabytes = bts / 1024 ** 4
  58.         size = '%.2fTb' % terabytes
  59.     elif bts >= 1024 ** 3:  # Converts to Gigabytes
  60.         gigabytes = bts / 1024 ** 3
  61.         size = '%.2fGb' % gigabytes
  62.     elif bts >= 1024 ** 2:  # Converts to Megabytes
  63.         megabytes = bts / 1024 ** 2
  64.         size = '%.2fMb' % megabytes
  65.     elif bts >= 1024:       # Converts to Kilobytes
  66.         kilobytes = bts / 1024
  67.         size = '%.2fKb' % kilobytes
  68.     else:                   # No Conversion
  69.         size = '%.2fb' % bts
  70.     return size
  71.  
  72. def getPageHtml(url):
  73.     try:
  74.         yTUBE = urllib.request.urlopen(url).read()
  75.         return str(yTUBE)
  76.     except urllib.error.URLError as e:
  77.         print(e.reason)
  78.         exit(1)
  79.  
  80. def getPlaylistUrlID(url):
  81.     if 'list=' in url:
  82.         eq_idx = url.index('=') + 1
  83.         pl_id = url[eq_idx:]
  84.         if '&' in url:
  85.             amp = url.index('&')
  86.             pl_id = url[eq_idx:amp]
  87.         return pl_id  
  88.     else:
  89.         print(url, "is not a youtube playlist.")
  90.         exit(1)
  91.  
  92. def getFinalVideoUrl(vid_urls):
  93.     final_urls = []
  94.     for vid_url in vid_urls:
  95.         url_amp = len(vid_url)
  96.         if '&' in vid_url:
  97.             url_amp = vid_url.index('&')
  98.         final_urls.append('http://www.youtube.com/' + vid_url[:url_amp])
  99.     return final_urls
  100.  
  101. def getPlaylistVideoUrls(page_content, url):
  102.     playlist_id = getPlaylistUrlID(url)
  103.  
  104.     vid_url_pat = re.compile(r'watch\?v=\S+?list=' + playlist_id)
  105.     vid_url_matches = list(set(re.findall(vid_url_pat, page_content)))
  106.  
  107.     if vid_url_matches:
  108.         final_vid_urls = getFinalVideoUrl(vid_url_matches)
  109.         print("Found",len(final_vid_urls),"videos in playlist.")
  110.         printUrls(final_vid_urls)
  111.         return final_vid_urls
  112.     else:
  113.         print('No videos found.')
  114.         exit(1)
  115.  
  116. #function added to get audio files along with the video files from the playlist
  117. def download_Video_Audio(path, vid_url, file_no):
  118.     try:
  119.         yt = YouTube(vid_url)
  120.     except Exception as e:
  121.         print("Error:", str(e), "- Skipping Video with url '"+vid_url+"'.")
  122.         return
  123.  
  124.     try:  # Tries to find the video in 720p
  125.         #video = yt.get('mp4', '720p')
  126.         video = yt.streams.filter(progressive = True, file_extension = "mp4").first()
  127.     except Exception:  # Sorts videos by resolution and picks the highest quality video if a 720p video doesn't exist
  128.         video = sorted(yt.filter("mp4"), key=lambda video: int(video.resolution[:-1]), reverse=True)[0]
  129.  
  130.         print("downloading", yt.title+" Video and Audio...")
  131.     try:
  132.         bar = progressBar()
  133.         video.download(path)
  134.         print("successfully downloaded", yt.title, "!")
  135.     except OSError:
  136.         print(yt.title, "already exists in this directory! Skipping video...")
  137.  
  138.     try:
  139.         os.rename(yt.title+'.mp4',str(file_no)+'.mp4')
  140.         aud= 'ffmpeg -i '+str(file_no)+'.mp4'+' '+str(file_no)+'.wav'
  141.         final_audio='lame '+str(file_no)+'.wav'+' '+str(file_no)+'.mp3'
  142.         os.system(aud)
  143.         os.system(final_audio)
  144.         os.remove(str(file_no)+'.wav')
  145.         print("sucessfully converted",yt.title, "into audio!")
  146.     except OSError:
  147.         print(yt.title, "There is some problem with the file names...")
  148.  
  149.  
  150. def printUrls(vid_urls):
  151.     for url in vid_urls:
  152.         print(url)
  153.         time.sleep(0.04)
  154.        
  155. if __name__ == '__main__':
  156.     if len(sys.argv) < 2 or len(sys.argv) > 3:
  157.         print('USAGE: python ytPlaylistDL.py playlistURL OR python ytPlaylistDL.py playlistURL destPath')
  158.         exit(1)
  159.     else:
  160.         url = sys.argv[1]
  161.         directory = os.getcwd() if len(sys.argv) != 3 else sys.argv[2]
  162.    
  163.         # make directory if dir specified doesn't exist
  164.         try:
  165.             os.makedirs(directory, exist_ok=True)
  166.         except OSError as e:
  167.             print(e.reason)
  168.             exit(1)
  169.  
  170.         if not url.startswith("http"):
  171.             url = 'https://' + url
  172.  
  173.         playlist_page_content = getPageHtml(url)
  174.         vid_urls_in_playlist = getPlaylistVideoUrls(playlist_page_content, url)
  175.  
  176.         # downloads videos and audios
  177.         for i,vid_url in enumerate(vid_urls_in_playlist):
  178.             download_Video_Audio(directory, vid_url, i)
  179.             time.sleep(1)
  180.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement