Advertisement
Guest User

Untitled

a guest
Feb 21st, 2018
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. function isUTF8ExtensionChar(char) {
  2.     return (char >= "\200" && char < "\277")
  3. }
  4.  
  5. # Extract a substring from a UTF-8 encoded string. This is required
  6. # since not all versions of awk respect the encoding specified by
  7. # $LANG. Of particular interest for me is the default busybox awk
  8. # within Alpine linux.
  9. function substrUTF8(str, start, len, inLen, subLen, inIndex, subIndex, outLen) {
  10.     # Length of input string
  11.     inLen = length(str)
  12.  
  13.     # Current index into input string
  14.     inIndex = 1
  15.  
  16.     # Skip the initial unicode characters to get to starting index
  17.     while (subIndex < start && inIndex <= inLen) {
  18.         if (!isUTF8ExtensionChar(substr(str, inIndex, 1))) {
  19.             inIndex++
  20.             subIndex++
  21.             while (isUTF8ExtensionChar(substr(str, inIndex, 1))) {
  22.                 inIndex++
  23.             }
  24.         }
  25.     }
  26.  
  27.     # Length of substring of input string which will produce the
  28.     # output string
  29.     subLen = 0
  30.  
  31.     # Starting index of input string which corresponds to the first
  32.     # character of the output string
  33.     subIndex = 1
  34.  
  35.     # Number of true unicode characters counted for output string
  36.     outLen = 0
  37.  
  38.     # Calculate the end point of the sub string
  39.     while (outLen < len && inIndex <= inLen) {
  40.         if (!isUTF8ExtensionChar(substr(str, inIndex, 1))) {
  41.             inIndex++
  42.             subLen++
  43.             outLen++
  44.             while (isUTF8ExtensionChar(substr(str, inIndex, 1))) {
  45.                 inIndex++
  46.                 subLen++
  47.             }
  48.         }
  49.     }
  50.  
  51.     return substr(str, subIndex, subLen);
  52. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement