faubiguy

Youtube Download Script 2.3

Oct 6th, 2015
223
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.94 KB | None | 0 0
  1. #!/usr/bin/python3
  2. import os, re, base64, time, sys, argparse, itertools
  3. import urllib.request as request
  4. from mutagen.oggvorbis import OggVorbis, OggVorbisHeaderError
  5. from mutagen.flac import Picture
  6. import youtube_dl
  7.  
  8. parser = argparse.ArgumentParser(description='Downloads songs from youtube videos and tags them based on description and metadata')
  9. parser.add_argument('videos', metavar='video', nargs='*', help='A video to download and tag. Use youtube id or url')
  10. parser.add_argument('-s', '--stdin', action='store_true', help='Read videos from stdin in addition to arguments')
  11. parser.add_argument('-d', '--directory', default='/home/zenith/Music/Library/ytdl/others', help='Directory to download songs to. Defaults to /home/zenith/Music/Library/ytdl/others')
  12. parser.add_argument('-n', '--no-tag', action='store_true', help='Don\'t tag, just download')
  13. parser.add_argument('-o', '--allow-overwrite', action='store_true', help='Allow downloaded file to overwrite existing file')
  14. args = parser.parse_args()
  15.  
  16. if not (args.videos or args.stdin):
  17.     print('poker')
  18.     print(args.videos)
  19.     print(args.stdin)
  20.     parser.error('Either the --stdin option or at least one video must be given.')
  21.  
  22. kvPattern = re.compile(r'\b((?:\w+\s+)*\w+)\s*(?::|:)\s*([^\s].*)$', re.UNICODE)
  23. kvPattern2 = re.compile(r'\b\[((?:\w+\s+)*\w+)\]\s*(\w+(?:\s+\w+)*)$', re.UNICODE)
  24. titlePattern = re.compile(r'^([\s\w]*\w)\s*\-\s*(\w[\s\w]*)$', re.UNICODE)
  25. tagdict = {
  26.     'bpm'        : 'BPM',
  27.     'artist'     : 'ARTIST',
  28.     'song'       : 'TITLE',
  29.     'title'      : 'TITLE',
  30.     'track'      : 'TITLE',
  31.     'genre'      : 'GENRE',
  32.     'language'   : 'LANGUAGE',
  33.     'album'      : 'ALBUM',
  34.     'disc'       : 'ALBUM',
  35.     'website'    : 'WEBSITE',
  36.     'site'       : 'WEBSITE',
  37.     'from'       : 'ORIGINALALBUM',
  38.     'original'   : 'ORIGINAL',
  39.     'source'     : 'ORIGINAL',
  40.     'key'        : 'KEY',
  41.     'arrangement': 'ARRANGER',
  42.     'arranger'   : 'ARRANGER',
  43.     'arrange'    : 'ARRANGER',
  44.     'lyric'      : 'LYRICIST',
  45.     'lyrics'     : 'LYRICIST',
  46.     'lyricist'   : 'LYRICIST',
  47. }
  48.  
  49. def tag(vorbis, title, description, channel, video_id):
  50.     lines = description.split('\n')
  51.     tags = {}
  52.     for tag, value in (match.group(1,2) for match in (re.search(kvPattern, line) or re.search(kvPattern2, line) for line in lines) if match):
  53.         tag = tag.lower()
  54.         if tag in tagdict:
  55.             tags[tagdict[tag]] = value
  56.         if tag == 'circle' or tag == 'group':
  57.             if 'artist' not in tags:
  58.                 tags['ARTIST'] = value
  59.             continue
  60.         if tag == 'singer' or tag == 'vocals' or tag == 'vocalist' or tag == 'vocal':
  61.             tags['VOCALIST'] = value
  62.             tags['VOCAL'] = 'Yes'
  63.     #print(title)
  64.     titleMatch = re.match(titlePattern, title) or re.match(kvPattern2, title)
  65.     if titleMatch:
  66.         if 'ARTIST' not in tags:
  67.             tags['ARTIST'] = titleMatch.group(1).strip()
  68.         if 'TITLE' not in tags:
  69.             tags['TITLE'] = titleMatch.group(2).strip()
  70.     else:
  71.         if 'TITLE' not in tags:
  72.             tags['TITLE'] = title
  73.         if 'ARTIST' not in tags:
  74.             tags['ARTIST'] = channel
  75.     if 'vocal' in title.lower():
  76.         tags['VOCAL'] = 'Yes'
  77.     tags['UPLOADER'] = channel
  78.     tags['YOUTUBE_ID'] = video_id
  79.     lyrics=re.search(r'lyrics:(.*?)(?:\n\n|$)', description, re.UNICODE + re.IGNORECASE)
  80.     if lyrics:
  81.         tags['LYRICS'] = lyrics.group(1)
  82.         tags['VOCAL'] = 'Yes'
  83.     picture = Picture()
  84.     picture.type = 3
  85.     picture.mime = 'image/jpeg'
  86.     picture.width = 480
  87.     picture.height = 360
  88.     with request.urlopen('http://img.youtube.com/vi/{0}/0.jpg'.format(video_id)) as thumbhttp:
  89.         picture.data = thumbhttp.read()
  90.     tags['METADATA_BLOCK_PICTURE'] = base64.b64encode(picture.write()).decode('ascii')
  91.     vorbis.clear()
  92.     for tag in vorbis:
  93.         del vorbis[tag]
  94.     for tag, value in tags.items():
  95.         vorbis[tag] = value
  96.     vorbis.pprint()
  97.     vorbis.save()
  98.        
  99. class Tagger(youtube_dl.postprocessor.common.PostProcessor):
  100.    
  101.     def run(self, information):
  102.         filename = information['filepath']
  103.         self._downloader.to_screen('[tagger] Tagging {0}'.format(filename))
  104.         try:
  105.             vorbis = OggVorbis(filename)
  106.         except OggVorbisHeaderError:
  107.             self._downloader.to_screen('[tagger] Unable to read. Renaming and Skipping.')
  108.             os.rename(filename, '.'+filename)
  109.             information['filepath'] = '.'+filename
  110.         else:
  111.             description = information['description']
  112.             uploader = information['uploader']
  113.             title = information['title']
  114.             video_id = information['id']
  115.             tag(vorbis, title, description, uploader, video_id)
  116.             self._downloader.to_screen('[tagger] Successfully tagged')
  117.         return [], information
  118.    
  119. class CollisionError(Exception):
  120.     pass
  121.        
  122. class AntiOverwrite(youtube_dl.postprocessor.common.PostProcessor):
  123.    
  124.     def run(self, information):
  125.         oggfilename = re.sub(r'\..*$', '.ogg', information['filepath'])
  126.         if os.path.exists(oggfilename):
  127.             self._downloader.to_screen('[collision_detector] File exists, skipping: {0}'.format(oggfilename))
  128.             os.remove(information['filepath'])
  129.             raise CollisionError()
  130.         return [], information
  131.    
  132. outtmpl = '%(uploader)s - %(title)s [%(id)s].%(ext)s'
  133.  
  134. if args.directory:
  135.     if not os.path.isdir(args.directory):
  136.         parser.error('Not a directory: {0}'.format(args.directory))
  137.     outtmpl = os.path.join(args.directory, outtmpl)
  138.  
  139. youtube_dl_options = {
  140.     'format': 'bestaudio/best',
  141.     'outtmpl': outtmpl,
  142.     'writedescription': True,
  143.     'logtostderr': True,
  144.     'nooverwrites': not args.allow_overwrite
  145. }
  146.  
  147. videos = args.videos
  148.  
  149. if args.stdin:
  150.     videos = itertools.chain(videos, (video.strip() for video in sys.stdin))
  151.  
  152. with youtube_dl.YoutubeDL(youtube_dl_options) as downloader:
  153.     if not args.allow_overwrite:
  154.         downloader.add_post_processor(AntiOverwrite())
  155.     downloader.add_post_processor(youtube_dl.postprocessor.ffmpeg.FFmpegExtractAudioPP(downloader, preferredcodec='vorbis', preferredquality='0'))
  156.     if not args.no_tag:
  157.         downloader.add_post_processor(Tagger())
  158.     for video in videos:
  159.         try:
  160.             downloader.download([video])
  161.         except CollisionError:
  162.             pass
  163.         except Exception as ex:
  164.             print('Error occured downloading video {0}: {1}'.format(video, ex), file=sys.stderr)
  165.         print('Finished video: {0}'.format(video))
Add Comment
Please, Sign In to add comment