Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- import os, re, base64, time, sys, argparse, itertools
- import urllib.request as request
- from mutagen.oggvorbis import OggVorbis, OggVorbisHeaderError
- from mutagen.flac import Picture
- import youtube_dl
- parser = argparse.ArgumentParser(description='Downloads songs from youtube videos and tags them based on description and metadata')
- parser.add_argument('videos', metavar='video', nargs='*', help='A video to download and tag. Use youtube id or url')
- parser.add_argument('-s', '--stdin', action='store_true', help='Read videos from stdin in addition to arguments')
- parser.add_argument('-d', '--directory', default='/home/zenith/Music/Library/ytdl/others', help='Directory to download songs to. Defaults to /home/zenith/Music/Library/ytdl/others')
- parser.add_argument('-n', '--no-tag', action='store_true', help='Don\'t tag, just download')
- parser.add_argument('-o', '--allow-overwrite', action='store_true', help='Allow downloaded file to overwrite existing file')
- args = parser.parse_args()
- if not (args.videos or args.stdin):
- print('poker')
- print(args.videos)
- print(args.stdin)
- parser.error('Either the --stdin option or at least one video must be given.')
- kvPattern = re.compile(r'\b((?:\w+\s+)*\w+)\s*(?::|:)\s*([^\s].*)$', re.UNICODE)
- kvPattern2 = re.compile(r'\b\[((?:\w+\s+)*\w+)\]\s*(\w+(?:\s+\w+)*)$', re.UNICODE)
- titlePattern = re.compile(r'^([\s\w]*\w)\s*\-\s*(\w[\s\w]*)$', re.UNICODE)
- tagdict = {
- 'bpm' : 'BPM',
- 'artist' : 'ARTIST',
- 'song' : 'TITLE',
- 'title' : 'TITLE',
- 'track' : 'TITLE',
- 'genre' : 'GENRE',
- 'language' : 'LANGUAGE',
- 'album' : 'ALBUM',
- 'disc' : 'ALBUM',
- 'website' : 'WEBSITE',
- 'site' : 'WEBSITE',
- 'from' : 'ORIGINALALBUM',
- 'original' : 'ORIGINAL',
- 'source' : 'ORIGINAL',
- 'key' : 'KEY',
- 'arrangement': 'ARRANGER',
- 'arranger' : 'ARRANGER',
- 'arrange' : 'ARRANGER',
- 'lyric' : 'LYRICIST',
- 'lyrics' : 'LYRICIST',
- 'lyricist' : 'LYRICIST',
- }
- def tag(vorbis, title, description, channel, video_id):
- lines = description.split('\n')
- tags = {}
- for tag, value in (match.group(1,2) for match in (re.search(kvPattern, line) or re.search(kvPattern2, line) for line in lines) if match):
- tag = tag.lower()
- if tag in tagdict:
- tags[tagdict[tag]] = value
- if tag == 'circle' or tag == 'group':
- if 'artist' not in tags:
- tags['ARTIST'] = value
- continue
- if tag == 'singer' or tag == 'vocals' or tag == 'vocalist' or tag == 'vocal':
- tags['VOCALIST'] = value
- tags['VOCAL'] = 'Yes'
- #print(title)
- titleMatch = re.match(titlePattern, title) or re.match(kvPattern2, title)
- if titleMatch:
- if 'ARTIST' not in tags:
- tags['ARTIST'] = titleMatch.group(1).strip()
- if 'TITLE' not in tags:
- tags['TITLE'] = titleMatch.group(2).strip()
- else:
- if 'TITLE' not in tags:
- tags['TITLE'] = title
- if 'ARTIST' not in tags:
- tags['ARTIST'] = channel
- if 'vocal' in title.lower():
- tags['VOCAL'] = 'Yes'
- tags['UPLOADER'] = channel
- tags['YOUTUBE_ID'] = video_id
- lyrics=re.search(r'lyrics:(.*?)(?:\n\n|$)', description, re.UNICODE + re.IGNORECASE)
- if lyrics:
- tags['LYRICS'] = lyrics.group(1)
- tags['VOCAL'] = 'Yes'
- picture = Picture()
- picture.type = 3
- picture.mime = 'image/jpeg'
- picture.width = 480
- picture.height = 360
- with request.urlopen('http://img.youtube.com/vi/{0}/0.jpg'.format(video_id)) as thumbhttp:
- picture.data = thumbhttp.read()
- tags['METADATA_BLOCK_PICTURE'] = base64.b64encode(picture.write()).decode('ascii')
- vorbis.clear()
- for tag in vorbis:
- del vorbis[tag]
- for tag, value in tags.items():
- vorbis[tag] = value
- vorbis.pprint()
- vorbis.save()
- class Tagger(youtube_dl.postprocessor.common.PostProcessor):
- def run(self, information):
- filename = information['filepath']
- self._downloader.to_screen('[tagger] Tagging {0}'.format(filename))
- try:
- vorbis = OggVorbis(filename)
- except OggVorbisHeaderError:
- self._downloader.to_screen('[tagger] Unable to read. Renaming and Skipping.')
- os.rename(filename, '.'+filename)
- information['filepath'] = '.'+filename
- else:
- description = information['description']
- uploader = information['uploader']
- title = information['title']
- video_id = information['id']
- tag(vorbis, title, description, uploader, video_id)
- self._downloader.to_screen('[tagger] Successfully tagged')
- return [], information
- class CollisionError(Exception):
- pass
- class AntiOverwrite(youtube_dl.postprocessor.common.PostProcessor):
- def run(self, information):
- oggfilename = re.sub(r'\..*$', '.ogg', information['filepath'])
- if os.path.exists(oggfilename):
- self._downloader.to_screen('[collision_detector] File exists, skipping: {0}'.format(oggfilename))
- os.remove(information['filepath'])
- raise CollisionError()
- return [], information
- outtmpl = '%(uploader)s - %(title)s [%(id)s].%(ext)s'
- if args.directory:
- if not os.path.isdir(args.directory):
- parser.error('Not a directory: {0}'.format(args.directory))
- outtmpl = os.path.join(args.directory, outtmpl)
- youtube_dl_options = {
- 'format': 'bestaudio/best',
- 'outtmpl': outtmpl,
- 'writedescription': True,
- 'logtostderr': True,
- 'nooverwrites': not args.allow_overwrite
- }
- videos = args.videos
- if args.stdin:
- videos = itertools.chain(videos, (video.strip() for video in sys.stdin))
- with youtube_dl.YoutubeDL(youtube_dl_options) as downloader:
- if not args.allow_overwrite:
- downloader.add_post_processor(AntiOverwrite())
- downloader.add_post_processor(youtube_dl.postprocessor.ffmpeg.FFmpegExtractAudioPP(downloader, preferredcodec='vorbis', preferredquality='0'))
- if not args.no_tag:
- downloader.add_post_processor(Tagger())
- for video in videos:
- try:
- downloader.download([video])
- except CollisionError:
- pass
- except Exception as ex:
- print('Error occured downloading video {0}: {1}'.format(video, ex), file=sys.stderr)
- print('Finished video: {0}'.format(video))
Add Comment
Please, Sign In to add comment