Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- pitchAbove=["̄","̈",""]
- pitchBelow=["̠","̤",""]
- pitchFont=["▓","░","▒"]#["ᄀ","ᄂ","ᄋ"]
- #Apparently, hangul doesn't work with the bird
- pitchRaw=["+","-","0"]
- contour=["","́","̀","̆","̂","̃"]
- contourFont=["▓","┘","┐","┌","└","┤"]#["ᅡ","ᅢ","ᅣ","ᅤ","ᅥ","ᅦ"]
- #Apparently, hangul doesn't work with the bird
- contourRaw=["1","2","3","4","5","6"]
- #For a digit encoder.
- digitsFont="⓪①②③④⑤⑥⑦⑧⑨⑩⑪⑫⑬⑭⑮"
- digitsRaw="0123456789abcdef"
- #Yet another hackjob.
- IPAInput=['i', 'e', 'u', 'o', 'p', 't', 's', 'j', 'y',
- '0', '+', '-', '1', '2', '3', '4', '5', '6', 'k',"X"]
- IPAOutput=['i', 'æ', 'u', 'ɒ', 'p', 't', 's', 'ʒ', 'j',
- "", '́', '̀', "", '̌', '̂', '᷉', '᷈', '̬', 'k',"*"]
- vowels ={"I","E","O","U","X"} #X added to mark placeholders
- controlCodes="#$" #only if digits is enabled, # for hex, $ for decimal
- #converts a number to the proper form. All lowercase!
- def numberEncoder(number,base10=False):
- if base10:
- number= '%x' % int(number)
- for i in range(16):
- number=number.replace(digitsRaw[i],digitsFont[i])
- return number
- #Takes a line of text with vowels of the form "V(+|-|0)(1|2|3|4|5|6)" and applies appropriate accents.
- #If digits is none, then assume use with font
- def encoder(raw,below=False,font=False,digits=None):
- if digits==None:
- digits=font
- if font:
- pitch=pitchFont
- cont=contourFont
- else:
- cont=contour
- if below:
- pitch=pitchBelow
- else:
- pitch=pitchAbove
- ret=""
- vowel=""
- number=""
- state=0 #was previously vowel state, but numbers use it too
- for char in raw:
- #first, number checks
- if state==-5: #Are we starting a decimal number?
- if char.isdigit(): #if so, coninue on
- number=char #overwrite the symbol
- else: #If not, then just print the symbol
- ret+=number
- state=0 # start again
- elif state==-4: #Are we starting a hex number?
- if digitsRaw.find(char.lower()) !=-1:
- number=char.lower() #Gotta keep it lower
- else:
- ret+=number
- state=0
- elif state == 4: # Are we continuing a hex number?
- temp = char.lower() #We use this twice
- val = digitsRaw.find(temp)
- if val == -1: #is it not in there?
- ret+=numberEncoder(number) # output our number
- state=0 #Return to normal excution
- else:
- number+=temp
- elif state == 5: #are we continuing a decimal number?
- if char.isdigit():
- number+=char
- else:
- ret+=numberEncoder(number,base10=True)
- state=0
- #Now, onto the normal stuff
- if state==0 and char.upper() not in vowels:
- if digits and char in controlCodes:
- state-=controlCodes.find(char)+4
- #Set to -5 or -4, depending on if it's hex or not
- number=char
- continue #bipass adding to return
- ret+=char
- continue
- if state==0:
- vowel=char
- state=-1
- if state==1:
- if char in {"0","-","+"}:
- index=pitchRaw.index(char)
- vowel+=pitch[index]
- state=-2
- elif char.isdigit() and int(char)<7:
- #vowel+=pitch[2] #Only helps with font
- state=2 #skip right to next entry
- else:
- vowel+=pitch[2] #Only helps with font
- #vowel+=cont[0] #again, only for font
- state=3 #jump right to the end
- if state==2:
- if char.isdigit() and int(char)<7:
- index=int(char)-1
- if index <0:
- #vowel+=cont[0] #again, only for font
- state=3
- else:
- vowel+=cont[index]
- state=-3
- else:
- #vowel+=cont[0] #again, only for font
- state=3
- if state==3:
- if char.upper() not in vowels:
- ret+=vowel+char
- vowel=""
- state=0
- else: #if the next one is a vowel, store it.
- ret+=vowel
- vowel=char
- state=-1
- state=abs(state) #get ready for next character
- if state==-1:
- vowel+=vowel+pitch[2] #font fix for single char
- if state <-3: #purge number if last
- ret+=number
- if state>3: #purge numbers
- ret+=numberEncoder(number,base10=(state>4))
- ret+=vowel #clear it out if anything is leftover
- ## if hangul: #Compose the hangul!
- ## return unicodedata.normalize("NFC",ret)
- return ret
- import unicodedata # Used to make a compatibility thing with the decoder
- #Normalizes the string for use with decoding
- def decompose(composed):
- return unicodedata.normalize("NFD",composed)
- #returns raw form from cooked, very similar
- #Verbose adds optional 0s and 1s
- # Decomp forces decomposition. Recommended.
- def decoder(encoded,verbose=False,below=False,decomp=True):
- if below:
- pitch=pitchBelow
- else:
- pitch=pitchAbove
- ret=""
- vowel=""
- vowelState=0
- if decomp:
- encoded = decompose(encoded)
- if verbose:
- encoded+=" " #fully flush out the vowel.
- for char in encoded:
- if vowelState==0 and char.upper() not in vowels:
- ret+=char
- continue
- if vowelState==0:
- vowel=char
- vowelState=-1
- if vowelState==1:
- if char in pitch:
- index=pitch.index(char)
- vowel+=pitchRaw[index]
- vowelState=-2
- elif char in contour:
- if verbose:
- vowel+="0"
- vowelState=2
- else:
- vowelState=3
- if vowelState == 2:
- if char in contour:
- index=contour.index(char)
- vowel+=contourRaw[index]
- vowelState=-3
- else:
- if verbose:
- vowel+="1"
- vowelState=3
- if vowelState == 3:
- if char.upper() not in vowels:
- ret+=vowel+char
- vowel=""
- vowelState=0
- else: #if the next one is a vowel, store it.
- ret+=vowel
- vowel=char
- vowelState=-1
- vowelState=abs(vowelState) #get ready for next character
- #print(vowel) #debug
- ret+=vowel #clear it out if anything is leftover
- if verbose:
- return ret[0:-1] #get rid of added space.
- return ret
- #very hackjobby, doesn't check if numbers are accenting vowels or not.
- def rawToIPA(raw,ipaChart=IPAOutput):
- raw=raw.lower()
- ret=""
- for char in raw:
- if char in IPAInput:
- index=IPAInput.index(char)
- ret+=ipaChart[index]
- else:
- ret+=char
- return ret
- # Helpful combination
- # Added auto picker for
- def output(w,raw=None,verbose=False,below=False,decomp=True,old=False):
- if raw == None:
- temp = decoder(w,verbose,below,decomp)
- raw = (temp == w)
- if raw:
- cooked=encoder(w,below)
- IPA=rawToIPA(w)
- if verbose:
- wRaw=decoder(w,below)
- else:
- wRaw=w
- else:
- cooked=w
- wRaw=decoder(w,verbose,below,decomp)
- IPA=rawToIPA(wRaw)
- if old:
- print(cooked,IPA,wRaw,sep=" | ")
- else:
- print(cooked+" /"+IPA+"/ ("+wRaw+")")
- out=output #shortcut
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement