Dinjdinj transliterator

import re, datetime, os
from datetime import datetime, timedelta

date = datetime.strftime(datetime.now()+timedelta(hours=-4), "%Y-%m-%d")

dropboxfolder = {
    r'/home/hellerick':r'/home/hellerick/Dropbox',
    r'C:\Users\OTK':r'D:\Dropbox',
    }[os.environ['HOME']]

pyscriptfolder = os.path.join(dropboxfolder, 'Programming', 'Python')

inputfilename = os.path.join(dropboxfolder, 'KSV-shared', 'Lib', 'Jefremov, Ivan Anatoljevich', 'Tumannostj Andromedy', 'EFRITUMA.htm')

outputfilename = re.sub(r'(\.[^.]+\Z)', r'.lat.'+date+r'\1', inputfilename)

hyphenate = True
simplify = True
maindict = dict()

if simplify:
    with open(os.path.join(pyscriptfolder, 'RussianRomanization', 'KeepDuplicationDictionary.txt'), mode='rt', encoding='utf-8') as f:
        keepdupldict = f.read()
    keepdupldict = keepdupldict.split('\n')
    keepdupldict = [i for i in keepdupldict if i!='']
    with open(os.path.join(pyscriptfolder, 'RussianRomanization', 'SpecialCasesDictionary.txt'), mode='rt', encoding='utf-8') as f:
        specdict = f.read()
    specdict = specdict.split('\n')
    specdict = {j.split('\t')[0]:j.split('\t')[1]
        for j in specdict if len(j.split('\t'))>1
        }#  if len(j)>=2

def checkspecialspelling(w):
    for i in specdict:
        if re.match(re.sub(r'\*','.*',i), w):
            i1 = re.sub('\*','',i)
            i2 = specdict[i]
            w = re.sub(i1, i2, w)
    return w

def keepduplication(w):
    for i in keepdupldict:
        i = re.sub(r'\*','.*',i)
        if re.match(i, w):
            i1 = re.sub('\.\*','',i)
            i2 = re.sub(r'([бвгджзклмнпрстфхцчшщ])\1',r'\1\1', i1)
            w = re.sub(i1, i2, w)
    return w

def translit(w):
    w = re.sub(r'и([ау])',r'ï\1', w)
    w = re.sub(r'цï([ау])',r'цi\1', w)
    w = re.sub(r'ля',r'lа', w)
    w = re.sub(r'лю',r'lу', w)
    w = re.sub(r'л([еиï])',r'l\1', w)
    w = re.sub(r'ль([еёио])',r'lй\1', w)
    w = re.sub(r'ль',r'l', w)
    w = re.sub(r'щ',r'сч', w)
    w = re.sub(r'([б-джзкмнп-тф-ш])ю',r'\1иу', w)
    w = re.sub(r'([б-джзкмнп-тф-ш])я',r'\1иа', w)
    w = re.sub(r'ю',r'йу', w)
    w = re.sub(r'я',r'йа', w)
    w = re.sub(r'ъ',r'й', w)
    w = re.sub(r'[йь]й',r'й', w)
    w = re.sub(r'ЛL',r'LL', w)
    w = re.sub(r'Лl',r'Ll', w)
    w = re.sub(r'лl',r'll', w)
    w = re.sub(r'([жчш])ь\b',r'\1', w)
    for c in enumerate(  'абцчдеёэфгхийьклмнопрсштувызж'):
        w = re.sub(c[1], 'abcčdeëèfghijjkłmnoprsštuvyzž'[c[0]], w)
    return w

def hyphen(w):
    if hyphenate:
        #
        vow='[aeëèiïouy]'
        con='[bcčdfghjklłmnprsštvzž]'
        let=vow[:-1]+con[1:] # 'ABCČDEËÈFGHIJKLŁMNOPRSŠTUVYZŽabcčdeëèfghijklłmnoprsštuvyzž'
        w = re.sub(r'\b('+let+')', r'<<\1', w)
        w = re.sub(r'('+let+r')\b', r'\1>>', w)
        w = re.sub('('+let+')('+let+')', r'\1_\2', w)
        w = re.sub('('+let+')('+let+')', r'\1_\2', w)
        w = re.sub('_('+let+')-', r'\1-', w)
        w = re.sub('-('+let+')_', r'\1', w)
        w = re.sub('-', r'_', w)
        w = re.sub('('+con+')_('+vow+')', r'\1\2', w)
        w = re.sub('(<<'+let+')_', r'\1', w)
        w = re.sub('_('+let+'>>)', r'\1', w)
        w = re.sub('(<<'+con+'+)_', r'\1', w)
        w = re.sub('(<<'+con+'+)_', r'\1', w)
        w = re.sub('_('+con+'+>>)', r'\1', w)
        w = re.sub('_('+con+'+>>)', r'\1', w)
        w = re.sub('i_([au])', r'i\1', w)
        w = re.sub('s_č', r'sč', w)
        w = re.sub('<<ra_sč', r'<<ras_č', w)
        w = re.sub('s_t', r'st', w)
        w = re.sub('('+vow+')_j', r'\1j', w)
        w = re.sub('('+con+')_(j_'+con+')', r'\1\2', w)
        w = re.sub('_('+con+r')(_\1)', r'\1\2', w)
        w = re.sub('('+con+r')(_\1)_', r'\1\2', w)
        w = re.sub('_(l_[cnš]|ł_[čkž]|m_[čp]|n_t_r|r_[mnt]|s_t_s|t_s)', r'\1', w)
        w = re.sub('_('+con+'j)', r'\1', w)
        for p in ('nad', 'nis', 'niz', 'pod', 'ras', 'raz', 'ros', 'roz', 'vos', 'voz'):
            w = re.sub('<<(['+p[0]+p[0].upper()+']['+p[1]+p[1].upper()+'])_(['+p[2]+p[2].upper()+'])', r'\1\2', w)
        w = re.sub(r'(<<|>>)',r'', w)
        w = re.sub(r'_',r'', w)
    else:
        w = re.sub('', '', w)
    return w

def transword(w):
    firstcap = allcaps = False
    if len(w)>1 and w[1].isupper():
        allcaps = True
    elif w[0].isupper():
        firstcap = True

    w = w.lower()

    out = maindict[w]

    if allcaps:
        out = out.upper()
    elif firstcap:
        out = out.capitalize()

    return out


def transtext(t):
    if t=='': return ''
    t = re.split(r"([а-яёА-ЯЁ]+)", t)
    for i,s in enumerate(t):
        if i%2!=0:
            t[i] = transword(t[i])
    t = ''.join(t)
    return t

def makemaindict(c):
    global maindict, cyrlist, latlist, watch
    cyrlist=sorted(list(set(re.sub(r'[^а-яё]+', r' ', c.lower()).split())))
    t = ' '.join(cyrlist)
    if simplify:
    #
        for i in specdict:
            affixes = [i[0]=='*', i[-1]=='*']
            i1 = re.sub(r'\*',r'(\S*)',i)
            i2 = specdict[i]
            if affixes == [True, True]: i2 = r'\1'+i2+r'\2'
            elif affixes == [True, False]: i2 = r'\1'+i2
            elif affixes == [False, True]: i2 = i2+r'\1'
            t = re.sub(r'\b'+i1+r'\b', i2, t)
        for i in keepdupldict:
            affixes = [i[0]=='*', i[-1]=='*']
            i1 = re.sub(r'\*',r'(\S*)',i)
            i2 = re.sub(r'([бвгджзклмнпрстфхцчшщ])\1', r'\1\1', re.sub(r'\*','',i))
            if affixes == [True, True]: i2 = r'\1'+i2+r'\2'
            elif affixes == [True, False]: i2 = r'\1'+i2
            elif affixes == [False, True]: i2 = i2+r'\1'
            t = re.sub(r'\b'+i1+r'\b', i2, t)
        t = re.sub(r'([бвгдзйклмнпрстфхцчшщ])\1',r'\1', t)
    #
    t = translit(t)
    if hyphenate: t = hyphen(t)
    latlist = t.split()
    maindict=dict(zip(cyrlist,latlist))

def transcode(c):
    makemaindict(c)
    if c=='': return ''
    c = re.split(r"(<[^>]+>)", c)
    for i,s in enumerate(c):
        if i%2!=1:
            c[i] = transtext(c[i])
    c = ''.join(c)
    return c

def convertfile():
    with open(inputfilename, mode='rt', encoding='utf-8') as f:
        text = f.read()
        print ('Input text length', len(text), 'characters.')
    text = transcode(text)
    with open(outputfilename, mode='wt', encoding='utf-8') as f:
        f.write(text)
        print ('Output text length', len(text), 'characters.')

def p(t):
    print (transcode(t))

convertfile()