Youtube Download Script 2.3

#!/usr/bin/python3
import os, re, base64, time, sys, argparse, itertools
import urllib.request as request
from mutagen.oggvorbis import OggVorbis, OggVorbisHeaderError
from mutagen.flac import Picture
import youtube_dl

parser = argparse.ArgumentParser(description='Downloads songs from youtube videos and tags them based on description and metadata')
parser.add_argument('videos', metavar='video', nargs='*', help='A video to download and tag. Use youtube id or url')
parser.add_argument('-s', '--stdin', action='store_true', help='Read videos from stdin in addition to arguments')
parser.add_argument('-d', '--directory', default='/home/zenith/Music/Library/ytdl/others', help='Directory to download songs to. Defaults to /home/zenith/Music/Library/ytdl/others')
parser.add_argument('-n', '--no-tag', action='store_true', help='Don\'t tag, just download')
parser.add_argument('-o', '--allow-overwrite', action='store_true', help='Allow downloaded file to overwrite existing file')
args = parser.parse_args()

if not (args.videos or args.stdin):
    print('poker')
    print(args.videos)
    print(args.stdin)
    parser.error('Either the --stdin option or at least one video must be given.')

kvPattern = re.compile(r'\b((?:\w+\s+)*\w+)\s*(?::|：)\s*([^\s].*)$', re.UNICODE)
kvPattern2 = re.compile(r'\b\[((?:\w+\s+)*\w+)\]\s*(\w+(?:\s+\w+)*)$', re.UNICODE)
titlePattern = re.compile(r'^([\s\w]*\w)\s*\-\s*(\w[\s\w]*)$', re.UNICODE)
tagdict = {
    'bpm'        : 'BPM',
    'artist'     : 'ARTIST',
    'song'       : 'TITLE',
    'title'      : 'TITLE',
    'track'      : 'TITLE',
    'genre'      : 'GENRE',
    'language'   : 'LANGUAGE',
    'album'      : 'ALBUM',
    'disc'       : 'ALBUM',
    'website'    : 'WEBSITE',
    'site'       : 'WEBSITE',
    'from'       : 'ORIGINALALBUM',
    'original'   : 'ORIGINAL',
    'source'     : 'ORIGINAL',
    'key'        : 'KEY',
    'arrangement': 'ARRANGER',
    'arranger'   : 'ARRANGER',
    'arrange'    : 'ARRANGER',
    'lyric'      : 'LYRICIST',
    'lyrics'     : 'LYRICIST',
    'lyricist'   : 'LYRICIST',
}

def tag(vorbis, title, description, channel, video_id):
    lines = description.split('\n')
    tags = {}
    for tag, value in (match.group(1,2) for match in (re.search(kvPattern, line) or re.search(kvPattern2, line) for line in lines) if match):
        tag = tag.lower()
        if tag in tagdict:
            tags[tagdict[tag]] = value
        if tag == 'circle' or tag == 'group':
            if 'artist' not in tags:
                tags['ARTIST'] = value
            continue
        if tag == 'singer' or tag == 'vocals' or tag == 'vocalist' or tag == 'vocal':
            tags['VOCALIST'] = value
            tags['VOCAL'] = 'Yes'
    #print(title)
    titleMatch = re.match(titlePattern, title) or re.match(kvPattern2, title)
    if titleMatch:
        if 'ARTIST' not in tags:
            tags['ARTIST'] = titleMatch.group(1).strip()
        if 'TITLE' not in tags:
            tags['TITLE'] = titleMatch.group(2).strip()
    else:
        if 'TITLE' not in tags:
            tags['TITLE'] = title
        if 'ARTIST' not in tags:
            tags['ARTIST'] = channel
    if 'vocal' in title.lower():
        tags['VOCAL'] = 'Yes'
    tags['UPLOADER'] = channel
    tags['YOUTUBE_ID'] = video_id
    lyrics=re.search(r'lyrics:(.*?)(?:\n\n|$)', description, re.UNICODE + re.IGNORECASE)
    if lyrics:
        tags['LYRICS'] = lyrics.group(1)
        tags['VOCAL'] = 'Yes'
    picture = Picture()
    picture.type = 3
    picture.mime = 'image/jpeg'
    picture.width = 480
    picture.height = 360
    with request.urlopen('http://img.youtube.com/vi/{0}/0.jpg'.format(video_id)) as thumbhttp:
        picture.data = thumbhttp.read()
    tags['METADATA_BLOCK_PICTURE'] = base64.b64encode(picture.write()).decode('ascii')
    vorbis.clear()
    for tag in vorbis:
        del vorbis[tag]
    for tag, value in tags.items():
        vorbis[tag] = value
    vorbis.pprint()
    vorbis.save()

class Tagger(youtube_dl.postprocessor.common.PostProcessor):

    def run(self, information):
        filename = information['filepath']
        self._downloader.to_screen('[tagger] Tagging {0}'.format(filename))
        try:
            vorbis = OggVorbis(filename)
        except OggVorbisHeaderError:
            self._downloader.to_screen('[tagger] Unable to read. Renaming and Skipping.')
            os.rename(filename, '.'+filename)
            information['filepath'] = '.'+filename
        else:
            description = information['description']
            uploader = information['uploader']
            title = information['title']
            video_id = information['id']
            tag(vorbis, title, description, uploader, video_id)
            self._downloader.to_screen('[tagger] Successfully tagged')
        return [], information

class CollisionError(Exception):
    pass

class AntiOverwrite(youtube_dl.postprocessor.common.PostProcessor):

    def run(self, information):
        oggfilename = re.sub(r'\..*$', '.ogg', information['filepath'])
        if os.path.exists(oggfilename):
            self._downloader.to_screen('[collision_detector] File exists, skipping: {0}'.format(oggfilename))
            os.remove(information['filepath'])
            raise CollisionError()
        return [], information

outtmpl = '%(uploader)s - %(title)s [%(id)s].%(ext)s'

if args.directory:
    if not os.path.isdir(args.directory):
        parser.error('Not a directory: {0}'.format(args.directory))
    outtmpl = os.path.join(args.directory, outtmpl)

youtube_dl_options = {
    'format': 'bestaudio/best',
    'outtmpl': outtmpl,
    'writedescription': True,
    'logtostderr': True,
    'nooverwrites': not args.allow_overwrite
}

videos = args.videos

if args.stdin:
    videos = itertools.chain(videos, (video.strip() for video in sys.stdin))

with youtube_dl.YoutubeDL(youtube_dl_options) as downloader:
    if not args.allow_overwrite:
        downloader.add_post_processor(AntiOverwrite())
    downloader.add_post_processor(youtube_dl.postprocessor.ffmpeg.FFmpegExtractAudioPP(downloader, preferredcodec='vorbis', preferredquality='0'))
    if not args.no_tag:
        downloader.add_post_processor(Tagger())
    for video in videos:
        try:
            downloader.download([video])
        except CollisionError:
            pass
        except Exception as ex:
            print('Error occured downloading video {0}: {1}'.format(video, ex), file=sys.stderr)
        print('Finished video: {0}'.format(video))