Youtube Download Script 2.1

#!/usr/bin/python3
import os, glob, re, base64, time, sys
import urllib.request as request
from mutagen.oggvorbis import OggVorbis, OggVorbisHeaderError
#from mutagen.flac import Picture
import youtube_dl

#urlPattern = re.compile(r'http://[\w\-\.]+?\.?([\w\-]+\.[\w\-]+)/?[^\s]*', re.UNICODE)
kvPattern = re.compile(r'\b((?:\w+\s+)*\w+)\s*(?::|：)\s*([^\s].*)$', re.UNICODE)
kvPattern2 = re.compile(r'\b\[((?:\w+\s+)*\w+)\]\s*(\w+(?:\s+\w+)*)$', re.UNICODE)
titlePattern = re.compile(r'^([\s\w]*\w)\s*\-\s*(\w[\s\w]*)$', re.UNICODE)
#idPattern = re.compile(r'[\w_\-]{11}', re.UNICODE)
tagdict = {'bpm'     : 'BPM',
        'artist'     : 'ARTIST',
        'song'       : 'TITLE',
        'title'      : 'TITLE',
        'track'      : 'TITLE',
        'genre'      : 'GENRE',
        'language'   : 'LANGUAGE',
        'album'      : 'ALBUM',
        'disc'       : 'ALBUM',
        'website'    : 'WEBSITE',
        'site'       : 'WEBSITE',
        'from'       : 'ORIGINALALBUM',
        'original'   : 'ORIGINAL',
        'source'     : 'ORIGINAL',
        'key'        : 'KEY',
        'arrangement': 'ARRANGER',
        'arranger'   : 'ARRANGER',
        'arrange'    : 'ARRANGER',
        'lyric'      : 'LYRICIST',
        'lyrics'     : 'LYRICIST',
        'lyricist'   : 'LYRICIST',
        }
#TODO website, vocals, vocalist, circle!=artist, lyrics, arranger, orginal, key, lyrics=re.match(/lyrics:(.*?)(?:\s\s|$)/, description), video id, bracketed labels

def tag(vorbis, title, description, channel, video_id):
    lines = description.split('\n')
    tags = {}
    for tag, value in (match.group(1,2) for match in (re.search(kvPattern, line) or re.search(kvPattern2, line) for line in lines) if match):
        tag = tag.lower()
        if tag in tagdict:
            tags[tagdict[tag]] = value
        if tag == 'circle' or tag == 'group':
            if 'artist' not in tags:
                tags['ARTIST'] = value
            continue
        if tag == 'singer' or tag == 'vocals' or tag == 'vocalist' or tag == 'vocal':
            tags['VOCALIST'] = value
            tags['VOCAL'] = 'Yes'
    #print(title)
    titleMatch = re.match(titlePattern, title) or re.match(kvPattern2, title)
    if titleMatch:
        if 'ARTIST' not in tags:
            tags['ARTIST'] = titleMatch.group(1).strip()
        if 'TITLE' not in tags:
            tags['TITLE'] = titleMatch.group(2).strip()
    else:
        if 'TITLE' not in tags:
            tags['TITLE'] = title
        if 'ARTIST' not in tags:
            tags['ARTIST'] = channel
    if 'vocal' in title.lower():
        tags['VOCAL'] = 'Yes'
    tags['UPLOADER'] = channel
    tags['YOUTUBE_ID'] = video_id
    lyrics=re.search(r'lyrics:(.*?)(?:\n\n|$)', description, re.UNICODE + re.IGNORECASE)
    if lyrics:
        tags['LYRICS'] = lyrics.group(1)
        tags['VOCAL'] = 'Yes'
    picture = Picture()
    picture.type = 3
    picture.mime = 'image/jpeg'
    picture.width = 480
    picture.height = 360
    #if os.path.exists(filename+'.jpg'):
        #with open(filename+'.jpg', 'rb') as picfile:
            #picture.data = picfile.read()
    with request.urlopen('http://img.youtube.com/vi/{0}/0.jpg'.format(video_id)) as thumbhttp:
        picture.data = thumbhttp.read()
        #with open(filename+'.jpg', 'wb') as picfile:
            #picfile.write(picture.data)
    tags['METADATA_BLOCK_PICTURE'] = base64.b64encode(picture.write()).decode('ascii')
    vorbis.clear()
    for tag, value in tags.items():
        vorbis[tag] = value
    if 'LANGUAGE' not in tags:
        del vorbis['LANGUAGE']
    vorbis.pprint()
    vorbis.save()

#oldcwd = os.getcwd()
#os.chdir('/home/zenith/Music/Library/ytdl')
#with open('dirnum', 'r') as dirnum_file:
    #dirnum = int(dirnum_file.read())
#dirname = 'download_' + str(dirnum)
#if not os.path.exists(dirname):
    #os.mkdir(dirname)
#os.chdir(dirname)

class Tagger(youtube_dl.postprocessor.common.PostProcessor):

    def run(self, information):
        filename = information['filepath']
        print('Tagging {0}'.format(filename), file=sys.stderr)
        try:
            vorbis = OggVorbis(filename)
        except OggVorbisHeaderError:
            print('Unable to read. Renaming and Skipping.', file=sys.stderr)
            os.rename(filename, '.'+filename)
            information['filepath'] = '.'+filename
        else
            description = information['description']
            uploader = information['uploader']
            title = information['title']
            video_id = information['id']
            tag(vorbis, title, description, uploader, video_id)
            print('Successfully tagged', file=sys.stderr)
        return [], information


youtube_dl_options = {
    'format': 'bestaudio/best',
    'outtmpl': '%(uploader)s - %(title)s [%(id)s].%(ext)s',
    'postprocessors': [
        {
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'vorbis',
            'preferredquality': '0',
            'nopostoverwrites': False,
        },
    ],
    'writedescription': True,
    'logtostderr': True
}

try:
    with youtube_dl.YoutubeDL(youtube_dl_options) as downloader:
        downloader.add_post_processor(Tagger())
        for video in sys.stdin:
            video = video.strip()
            if video[0] != ';' and video[0] != '#':
                try:
                    downloader.download([video])
                except:
                    print('Error occured downloading video: {0}'.format(video), file=sys.stderr)
                video = '#' + video
            print(video)
        do_extract()
except:
    sys.stdout.write(sys.stdin.read())

os.chdir('..')
with open('dirnum', 'w') as dirnum_file:
    dirnum_file.write(str(dirnum+1))

#os.chdir(oldcwd)