Advertisement
romaji

Chirp accentor

Feb 23rd, 2019
408
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.77 KB | None | 0 0
  1. pitchAbove=["̄","̈",""]
  2. pitchBelow=["̠","̤",""]
  3. pitchFont=["▓","░","▒"]#["ᄀ","ᄂ","ᄋ"]
  4. #Apparently, hangul doesn't work with the bird
  5. pitchRaw=["+","-","0"]
  6. contour=["","́","̀","̆","̂","̃"]
  7. contourFont=["▓","┘","┐","┌","└","┤"]#["ᅡ","ᅢ","ᅣ","ᅤ","ᅥ","ᅦ"]
  8. #Apparently, hangul doesn't work with the bird
  9. contourRaw=["1","2","3","4","5","6"]
  10.  
  11. #For a digit encoder.
  12. digitsFont="⓪①②③④⑤⑥⑦⑧⑨⑩⑪⑫⑬⑭⑮"
  13. digitsRaw="0123456789abcdef"
  14.  
  15. #Yet another hackjob.
  16. IPAInput=['i', 'e', 'u', 'o', 'p', 't', 's', 'j', 'y',
  17.           '0', '+', '-', '1', '2', '3', '4', '5', '6', 'k',"X"]
  18. IPAOutput=['i', 'æ', 'u', 'ɒ', 'p', 't', 's', 'ʒ', 'j',
  19.            "",  '́',  '̀',  "",  '̌',  '̂',  '᷉',  '᷈',  '̬', 'k',"*"]
  20.  
  21. vowels ={"I","E","O","U","X"} #X added to mark placeholders
  22. controlCodes="#$" #only if digits is enabled, # for hex, $ for decimal
  23.  
  24. #converts a number to the proper form. All lowercase!
  25. def numberEncoder(number,base10=False):
  26.     if base10:
  27.         number= '%x' % int(number)
  28.     for i in range(16):
  29.         number=number.replace(digitsRaw[i],digitsFont[i])
  30.     return number
  31.  
  32. #Takes a line of text with vowels of the form "V(+|-|0)(1|2|3|4|5|6)" and applies appropriate accents.
  33.              #If digits is none, then assume use with font
  34. def encoder(raw,below=False,font=False,digits=None):
  35.     if digits==None:
  36.         digits=font
  37.     if font:
  38.         pitch=pitchFont
  39.         cont=contourFont
  40.     else:
  41.         cont=contour
  42.         if below:
  43.             pitch=pitchBelow
  44.         else:
  45.             pitch=pitchAbove
  46.     ret=""
  47.     vowel=""
  48.     number=""
  49.     state=0 #was previously vowel state, but numbers use it too
  50.     for char in raw:
  51.         #first, number checks
  52.         if state==-5: #Are we starting a decimal number?
  53.             if char.isdigit(): #if so, coninue on
  54.                 number=char #overwrite the symbol
  55.             else: #If not, then just print the symbol
  56.                 ret+=number
  57.                 state=0 # start again
  58.         elif state==-4: #Are we starting a hex number?
  59.             if digitsRaw.find(char.lower()) !=-1:
  60.                 number=char.lower() #Gotta keep it lower
  61.             else:
  62.                 ret+=number
  63.                 state=0
  64.         elif state == 4: # Are we continuing a hex number?
  65.             temp = char.lower() #We use this twice
  66.             val = digitsRaw.find(temp)
  67.             if val == -1: #is it not in there?
  68.                 ret+=numberEncoder(number) # output our number
  69.                 state=0 #Return to normal excution
  70.             else:
  71.                 number+=temp
  72.         elif state == 5: #are we continuing a decimal number?
  73.             if char.isdigit():
  74.                 number+=char
  75.             else:
  76.                 ret+=numberEncoder(number,base10=True)
  77.                 state=0
  78.  
  79.         #Now, onto the normal stuff
  80.         if state==0 and char.upper() not in vowels:
  81.             if digits and char in controlCodes:
  82.                 state-=controlCodes.find(char)+4
  83.                 #Set to -5 or -4, depending on if it's hex or not
  84.                 number=char
  85.                 continue #bipass adding to return
  86.             ret+=char
  87.             continue
  88.         if state==0:
  89.             vowel=char
  90.             state=-1
  91.         if state==1:
  92.             if char in {"0","-","+"}:
  93.                 index=pitchRaw.index(char)
  94.                 vowel+=pitch[index]
  95.                 state=-2
  96.             elif char.isdigit() and int(char)<7:
  97.                 #vowel+=pitch[2] #Only helps with font
  98.                 state=2 #skip right to next entry
  99.             else:
  100.                 vowel+=pitch[2] #Only helps with font
  101.                 #vowel+=cont[0] #again, only for font
  102.                 state=3 #jump right to the end
  103.         if state==2:
  104.             if char.isdigit() and int(char)<7:
  105.                 index=int(char)-1
  106.                 if index <0:
  107.                     #vowel+=cont[0] #again, only for font
  108.                     state=3
  109.                 else:
  110.                     vowel+=cont[index]
  111.                     state=-3
  112.             else:
  113.                 #vowel+=cont[0] #again, only for font
  114.                 state=3
  115.         if state==3:
  116.             if char.upper() not in vowels:
  117.                 ret+=vowel+char
  118.                 vowel=""
  119.                 state=0
  120.             else: #if the next one is a vowel, store it.
  121.                 ret+=vowel
  122.                 vowel=char
  123.                 state=-1
  124.         state=abs(state) #get ready for next character
  125.     if state==-1:
  126.         vowel+=vowel+pitch[2] #font fix for single char
  127.     if state <-3: #purge number if last
  128.         ret+=number
  129.     if state>3: #purge numbers
  130.         ret+=numberEncoder(number,base10=(state>4))
  131.     ret+=vowel #clear it out if anything is leftover
  132. ##    if hangul: #Compose the hangul!
  133. ##        return unicodedata.normalize("NFC",ret)
  134.     return ret
  135.  
  136. import unicodedata # Used to make a compatibility thing with the decoder
  137. #Normalizes the string for use with decoding
  138. def decompose(composed):
  139.     return unicodedata.normalize("NFD",composed)
  140. #returns raw form from cooked, very similar
  141. #Verbose adds optional 0s and 1s
  142. # Decomp forces decomposition. Recommended.
  143. def decoder(encoded,verbose=False,below=False,decomp=True):
  144.     if below:
  145.         pitch=pitchBelow
  146.     else:
  147.         pitch=pitchAbove
  148.     ret=""
  149.     vowel=""
  150.     vowelState=0
  151.     if decomp:
  152.         encoded = decompose(encoded)
  153.     if verbose:
  154.         encoded+=" " #fully flush out the vowel.
  155.     for char in encoded:
  156.         if vowelState==0 and char.upper() not in vowels:
  157.             ret+=char
  158.             continue
  159.         if vowelState==0:
  160.             vowel=char
  161.             vowelState=-1
  162.         if vowelState==1:
  163.             if char in pitch:
  164.                 index=pitch.index(char)
  165.                 vowel+=pitchRaw[index]
  166.                 vowelState=-2
  167.             elif char in contour:
  168.                 if verbose:
  169.                     vowel+="0"
  170.                 vowelState=2
  171.             else:
  172.                 vowelState=3
  173.         if vowelState == 2:
  174.             if char in contour:
  175.                 index=contour.index(char)
  176.                 vowel+=contourRaw[index]
  177.                 vowelState=-3
  178.             else:
  179.                 if verbose:
  180.                     vowel+="1"
  181.                 vowelState=3
  182.         if vowelState == 3:
  183.             if char.upper() not in vowels:
  184.                 ret+=vowel+char
  185.                 vowel=""
  186.                 vowelState=0
  187.             else: #if the next one is a vowel, store it.
  188.                 ret+=vowel
  189.                 vowel=char
  190.                 vowelState=-1
  191.         vowelState=abs(vowelState) #get ready for next character
  192.         #print(vowel) #debug
  193.     ret+=vowel #clear it out if anything is leftover
  194.     if verbose:
  195.         return ret[0:-1] #get rid of added space.
  196.     return ret
  197.  
  198. #very hackjobby, doesn't check if numbers are accenting vowels or not.
  199. def rawToIPA(raw,ipaChart=IPAOutput):
  200.     raw=raw.lower()
  201.     ret=""
  202.     for char in raw:
  203.         if char in IPAInput:
  204.             index=IPAInput.index(char)
  205.             ret+=ipaChart[index]
  206.         else:
  207.             ret+=char
  208.     return ret
  209.  
  210. # Helpful combination
  211. # Added auto picker for
  212. def output(w,raw=None,verbose=False,below=False,decomp=True,old=False):
  213.     if raw == None:
  214.         temp = decoder(w,verbose,below,decomp)
  215.         raw = (temp == w)
  216.     if raw:
  217.         cooked=encoder(w,below)
  218.         IPA=rawToIPA(w)
  219.         if verbose:
  220.             wRaw=decoder(w,below)
  221.         else:
  222.             wRaw=w
  223.     else:
  224.         cooked=w
  225.         wRaw=decoder(w,verbose,below,decomp)
  226.         IPA=rawToIPA(wRaw)
  227.     if old:
  228.         print(cooked,IPA,wRaw,sep=" | ")
  229.     else:
  230.         print(cooked+" /"+IPA+"/ ("+wRaw+")")
  231.  
  232. out=output #shortcut
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement