Audacity label tool

#! C:\Python\Python37\python.exe
#! coding: utf-8
#! python3
# Black Cat 2019
# Licence cc-by-nc-sa
# Adds timestamp in format HH_MM_SS_ from precise audacity timestamps
# Also, checks for type and abbreviations
# And do a freaking mess of other things

import difflib

VERSION = 0.9

def timestampToTime( timestamp ) :
    hours = int( timestamp/3600 )
    minutes = int( (timestamp - (3600*hours)) / 60 )
    seconds = int( (timestamp - (60*minutes) - (3600*hours)) )
    millis = int( (timestamp - int(timestamp)) * 1000 )
    fractional = timestamp - int(timestamp)
    return (hours, minutes, seconds, millis, fractional)


def timeToTimestamp( hours=0, minutes=0, seconds=0, millis=0 ) :
    return (3600*hours)+(60*minutes)+seconds+(millis/1000)


def likeness( a="", b="" ) :
    likeness = difflib.SequenceMatcher(None, a, b).ratio()
    return likeness

# This is so dirty and lazy. Please, uses a real library like "inflect"
def num2str( num=0 ) :
    string = ""
    if abs( num ) > 9999999999 :
        error( "num2str : (positive or negative) Number to big sorry my coder was lazy" )
        string = str(num)
    elif num == 0 :
        string = "zero"
    else :
        # Power Of Ten = 0
        units = ["", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "TEN", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"]
        # Power Of Ten = 1
        decades = ["", "ten", "twenty", "thirty", "fourty", "fifty", "sixty", "seventy", "eighty", "ninety"]
        hundreds = "hundred"
        # Power Of Ten [0, 3, 6, 9]
        thousands = ["", "thousand", "million", "billion"]

        n = abs(num)
        t = 0
        string = ""
        while n > 0 :
            strHun = ""
            hun = n % 1000
            n = int(n/1000)

            u = hun % 10
            d = int((hun % 100)/10)
            c = int(hun/100)

            if d < 2 :
                strHun = units[(10*d)+u]
            else :
                strHun = decades[d] + " " + units[u]

            if c > 0 :
                strHun = units[c] + " hundred and " + strHun

            if t != 0 :
                strHun = strHun + " " + thousands[t] + ", "
            t += 1

            if string != "" :
                string = " " + string
            string = strHun + string

        if num < 0 :
            string = "minus "

    return string

#==============================================================================
# Eye candy
#==============================================================================
def info( text="?" ) :
    global ERROR_LEVEL
    if ERROR_LEVEL > 2 :
        print( "   info   : "+text )

def good( text="?" ) :
    global ERROR_LEVEL
    if ERROR_LEVEL > 1 :
        print( "   good   : "+text )

def warning( text="?" ) :
    global ERROR_LEVEL
    if ERROR_LEVEL > 0 :
        print( "[warning] : "+text )

def error( text="?" ) :
    print( "[ ERROR ] : "+text )

def draw_spacer(char="=") :
    print( char*79 )


#==============================================================================
# Parameters
INPUT = "labelsRaw_Test.txt"
#INPUT = r"D:\PPPV\Cut\MLP\0102\0102Raw.txt"

# Shall the program save the file or just checks for goofs?
SAVE_FILE = True
OUTPUT = "labels.txt"

# Shall the program autoreplace typos?
REPLACE_TYPO = True

# Shall the program autoreplace leftover abbreviations?
REPLACE_LEFTOVER = True

# Shall the program autoreplace ending punctuation?
REPLACE_ENDING_PUNCTUATION = True

# Shall the program autoreplace transcript abbreviations?
REPLACE_ABBREVIATIONS = True

# Error level to display or not stuff.
# 0 : errors only
# 1 : errors and warning
# 2 : errors, warning and good
# 3 : errors, warning, good and infos
ERROR_LEVEL = 3


# Shall not modify after this line
#=============================================================================
draw_spacer()
print( "Welcome to label checker V "+str(VERSION) )
print( "Let me bitch about your typo and other mistakes..." )
print()
print( "input\t: " + INPUT )
print( "output\t: " + OUTPUT )
bools = ["Nope", "Eyup"]
print( "Replace typos    \t: " + bools[int(REPLACE_TYPO)] )
print( "Replace leftovers\t: " + bools[int(REPLACE_LEFTOVER)] )
print( "Replace punctuation\t: " + bools[int(REPLACE_ENDING_PUNCTUATION)] )
print( "Replace abbreviations\t: " + bools[int(REPLACE_ABBREVIATIONS)] )

if (int(REPLACE_TYPO) + int(REPLACE_LEFTOVER) + int(REPLACE_ENDING_PUNCTUATION) + int(REPLACE_ABBREVIATIONS) ) > 3 :
    print( "(You shouldn't put that much trust in me...)" )

whine = ["warning only", "warning and error", "boringly verbose", "worst than your ex"]
print( "Whining level\t: " + whine[ERROR_LEVEL] )
draw_spacer()
print()

abbreviations = {
    "names" : {
        "Twilight":["twilight sparkle", "ts", "twi"],
        "Pinkie":["pinkie pie", "pp"],
        "Applejack":["aj", "apl"],
        "Rainbow":["rainbow dash", "rd"],
        "Rarity":["ra", "rar"],
        "Fluttershy":["fs", "flu"],
        "Spike":["sp", "spi"],
        "Celestia":["princess celestia", "ce", "cel", "celly"],
        "Luna":["princess luna", "lu"],
        "Apple Bloom":["ab"],
        "Sweetie Belle":["sb"],
        "Scootaloo":["sc", "chicken"],
        "Diamond Tiarra":["dt"],
        "Silver Spoon":["ss"],
        "Cheerlee":["ch"],
        "Iron Will":["i"],
        "Cadence":["princess cadence", "cad"],
        "Shining Armor":["sa"],
        "Chrysalis":["queen chrysalis", "cr", "bug butt"],
        "Nightmare Moon":["nm"],
        "Grany Smith":["gs"],
        "Mayor Mare":["mm"],
        "Babs Seed":["bs"],
        "Spitfire":["sf"],
        "Lightning Dust":["ld"]
    },
    "moods" : {
        "Neutral":["n"],
        "Happy":["h"],
        "Amused":["am"],
        "Sad":["s"],
        "Annoyed":["a"],
        "Angry":["ag"],
        "Disgust":["d"],
        "Sarcastic":["sa"],
        "Smug":["sm"],
        "Fear":["f"],
        "Anxious":["ax"],
        "Confused":["c"],
        "Surprised":["su"],
        "Tired":["t"],
        "Whispering":["w"],
        "Shouting":["sh"],
        "Whining":["wh"]
    },
    "noise" : {
        "Noisy":["q", "n"],
        "Very Noisy":["qq", "vn"]
    },
    "transcript" : {
        "mister" : "mr",
        "miss" : "ms"
    }
}


characters = []

# Reads the labels file
try :
    file = open( INPUT, "r" )
    labelData = file.readlines()

except NameError as e :
    error( "File " + INPUT + " can't be found." )

except IOError as e :
    error( "IO error {0} when trying to access the file : {1}".format( e.errno, e.strerror ) )

finally :
    file.close()


# Gather informations from label file

episode = {}
lines = []

#-----------------------------------------------------------------------------
# Work on each label

for index in range( len(labelData) ) :
    # By default, the line shall be defective to be absolutely sure. But I'm lazy
    rottenLine = False

    line = labelData[index]

    if line.rstrip() == "" :
        warning( "Empty line " + str(index +1 ) )
        rottenLine = True
    else :

        if line.count("\t") != 2 :
            error( "Malformed line " + str(index +1 ) + " : Check tabulations on label" )
            rottenLine = True
        else :
            start, end, label = line.split( "\t" )

            start = start
            end = end
            label = label.rstrip()

            length = float(end) - float(start)
            hour, minute, second, milli, trash = timestampToTime( float(start) )

            entry = "{hour:0>2}{minute:0>2}{second:0>2}".format( hour=hour, minute=minute, second=second)

            #-----------------------------------------------------------------
            # Checks the label content
            underscoreNumber = label.count("_")

            if underscoreNumber == 6 :
                info( "Line " + str(index +1) + " : Timestamp already here" )
                h, m, s, character, moods, noise, transcript = label.split( "_" )

            elif underscoreNumber == 3 :
                character, moods, noise, transcript = label.split( "_" )

            else :
                error( "Malformed line " +str(index +1 ) + " : Check underscores" )
                rottenLine = True

            if not rottenLine :
                #-------------------------------------------------------------
                # Character checks
                if character != "" :
                    # Typo checks
                    for char in abbreviations["names"] :
                        if character != char :
                            if likeness( character, char ) > 0.85 :
                                if REPLACE_TYPO :
                                    info( "Line " + str(index +1 ) + " name : " + character + " replaced by " + char )
                                    character = char
                                else :
                                    warning( "Line " + str(index +1 ) + " name : " + character + " used. Do you mean " + char + "?" )

                    # Abbreviations checks
                    for char in abbreviations["names"] :
                        if character.lower() in abbreviations["names"][char] :
                            if REPLACE_LEFTOVER :
                                info( "Line " + str(index +1 ) + " name : " + character + " replaced by " + char )
                                character = char
                            else :
                                warning( "Line " + str(index +1 ) + " name : " + character + " used. Do you mean " + char + "?" )
                else :
                    error( "Line " + str(index +1 ) + " : No character" )
                    rottenLine = True

                #-------------------------------------------------------------
                # Mood checks
                if moods != "" :
                    m = moods.split()

                    # For each mood in the label
                    for id in range( len(m) ) :

                        # Typo checks
                        for mood in abbreviations["moods"] :
                            if m[id] != mood :
                                if likeness( m[id], mood ) > 0.85 :
                                    if REPLACE_TYPO :
                                        info( "Line " + str(index +1 ) + " mood : " + m[id] + " replaced by " + mood )
                                        m[id] = mood
                                    else :
                                        warning( "Line " + str(index +1 ) + " mood : " + m[id] + " used. Do you mean " + mood + "?" )

                        # Abbreviations checks
                        # Checks against each known mood leftover abbreviation
                        for mood in abbreviations["moods"] :
                            if m[id].lower() in abbreviations["moods"][mood] :
                                if REPLACE_LEFTOVER :
                                    info( "Line " + str(index +1 ) + " mood : " + m[id] + " replaced by " + mood )
                                    m[id] = mood
                                else :
                                    warning( "Line " + str(index +1 ) + " mood : " + m[id] + " used. Do you mean " + mood + "?" )

                        # Checks if the mod is known
                        m[id] = m[id].capitalize()
                        if m[id] not in abbreviations["moods"] :
                            warning( "Line "+str(index +1)+" mood : '" + m[id] + "' unknown. Can't check it" )

                    # Construct back the string. Obviously a more pythonic way exists, but I'm a lazy fuck.
                    moods = ""
                    for id in range( len(m) ) :
                        if id > 0 :
                            moods += " "
                        moods += m[id]

                else :
                    error( "Line " + str(index +1 ) + " : No mood" )
                    rottenLine = True

                #-------------------------------------------------------------
                # Noise checks
                if noise != "" :
                    for n in abbreviations["noise"] :

                        # Abbreviation check
                        if noise in abbreviations["noise"][n] :
                            if REPLACE_LEFTOVER :
                                info( "Line " + str(index +1 ) + " noise : " + noise + " replaced by " + n )
                                noise = n
                            else :
                                warning( "Line " + str(index +1 ) + " noise : " + noise + " used. Do you mean " + n + "?" )

                        # Typo check
                        if noise != n :
                            if likeness( noise.lower(), n.lower() ) > 0.85 :
                                if REPLACE_TYPO :
                                    info( "Line " + str(index +1) + " noise : " + noise + " replaced by " + n )
                                    noise = n
                                else :
                                    warning( "Line " + str(index +1) + " noise : " + noise + "used. Do you mean " + n + "?" )


                    if noise.lower() not in ["", "noisy", "very noisy"] :
                        error( "Line " + str(index +1) + " noise is " + noise + " ; shall be nothing, 'noisy' or 'very noisy'" )
                        rottenLine = True

                #-------------------------------------------------------------
                # Transcript checks
                if transcript != "" :

                    # Check for ending punctuation
                    if transcript[-1] not in ",.!?" :
                        ending = ""
                        m = moods.split()
                        for mood in m :
                            if mood in ["Happy", "Angry", "Fear", "Anxious", "Surprised", "Shouting"] :
                                if ending.count("!") == 0 :
                                    ending += "!"
                            elif mood in ["Annoyed", "Sarcastic", "Confused"] :
                                if ending.count("?") == 0 :
                                    ending += "?"

                        if ending == "" :
                            ending = "."

                        if REPLACE_ENDING_PUNCTUATION :
                            info( "Line " + str(index +1) + " trans : No punctuation. Guestimate based on mood : " + ending + " (please, do check, I'm not good with emotions...)" )
                            transcript += ending
                        else :
                            warning( "Line " + str(index +1) + " trans : No punctuation. (Guestimate based on mood : "+ending+" )" )

                    # Check for numbers and abbreviations
                    words = transcript.split( " " )
                    trans = ""
                    for word in words :

                        # Numbers
                        if word.isdigit() :
                            if REPLACE_ABBREVIATIONS :
                                number = num2str( int(word) )
                                info( "Line "+str(index +1)+" : "+ word+" expanded to " + number )
                                word = number # Yay, that's duck typing! (kidding, both are string, but you got the joke, right? Ri-right?)

                        for abb in abbreviations["transcript"] :
                            #if abbreviations["transcript"][abb] in word :
                            if abbreviations["transcript"][abb] == word :
                                info( "Line "+str(index +1)+" : " + word+" expanded to " + abb)
                                word = abb

                        # Rebuild the transcript
                        if trans != "" :
                            trans += " "
                        trans += word

                    transcript = trans

                else :
                    error( "Line " + str(index +1 ) + " : No transcript" )
                    rottenLine = True

    #-------------------------------------------------------------------------
    # If no errors, keep the line
    if not rottenLine :
        lines.append( f"{start}\t{end}\t{hour:0>2}_{minute:0>2}_{second:0>2}_{character}_{moods}_{noise}_{transcript}" )
    else :
        info( "Line " + str(index +1 ) + " dropped." )


# Save to file if asked
if SAVE_FILE :
    try :
        file = open( OUTPUT, "w" )
        for line in lines :
            file.write( line + "\n" )
        info( "File saved" )

    except NameError as e :
        error( "File " + OUTPUT + " can't be found." )

    except IOError as e :
        error( "IO error {0} when trying to access the file : {1}".format( e.errno, e.strerror ) )

    finally :
        file.close()
else :
    for line in lines :
        print( line )
    info( "Nothing saved to the disk." )

input("Press enter to quit.")