daily pastebin goal
12%
SHARE
TWEET

Untitled

a guest Jan 13th, 2018 57 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. if SERVER then
  2.     AddCSLuaFile("utf8data.lua")
  3. end
  4. include("utf8data.lua")
  5.  
  6. -- Copyright (c) 2006-2007, Kyle Smith
  7.  
  8. -- returns the number of bytes used by the UTF-8 character at byte i in s
  9. -- also doubles as a UTF-8 character validator
  10. local function utf8charbytes(s, i)
  11.     -- argument defaults
  12.     i = i or 1
  13.  
  14.     -- argument checking
  15.     if type(s) ~= "string" then
  16.         error("bad argument #1 to 'utf8charbytes' (string expected, got ".. type(s).. ")")
  17.     end
  18.     if type(i) ~= "number" then
  19.         error("bad argument #2 to 'utf8charbytes' (number expected, got ".. type(i).. ")")
  20.     end
  21.  
  22.     local c = s:byte(i)
  23.  
  24.     -- determine bytes needed for character, based on RFC 3629
  25.     -- validate byte 1
  26.     if c > 0 and c <= 127 then
  27.         -- UTF8-1
  28.         return 1
  29.     elseif c >= 194 and c <= 223 then
  30.         -- UTF8-2
  31.         local c2 = s:byte(i + 1)
  32.  
  33.         if not c2 then
  34.             error("UTF-8 string terminated early")
  35.         end
  36.  
  37.         -- validate byte 2
  38.         if c2 < 128 or c2 > 191 then
  39.             error("Invalid UTF-8 character")
  40.         end
  41.  
  42.         return 2
  43.     elseif c >= 224 and c <= 239 then
  44.         -- UTF8-3
  45.         local c2 = s:byte(i + 1)
  46.         local c3 = s:byte(i + 2)
  47.  
  48.         if not c2 or not c3 then
  49.             error("UTF-8 string terminated early")
  50.         end
  51.  
  52.         -- validate byte 2
  53.         if c == 224 and (c2 < 160 or c2 > 191) then
  54.             error("Invalid UTF-8 character")
  55.         elseif c == 237 and (c2 < 128 or c2 > 159) then
  56.             error("Invalid UTF-8 character")
  57.         elseif c2 < 128 or c2 > 191 then
  58.             error("Invalid UTF-8 character")
  59.         end
  60.  
  61.         -- validate byte 3
  62.         if c3 < 128 or c3 > 191 then
  63.             error("Invalid UTF-8 character")
  64.         end
  65.  
  66.         return 3
  67.     elseif c >= 240 and c <= 244 then
  68.         -- UTF8-4
  69.         local c2 = s:byte(i + 1)
  70.         local c3 = s:byte(i + 2)
  71.         local c4 = s:byte(i + 3)
  72.  
  73.         if not c2 or not c3 or not c4 then
  74.             error("UTF-8 string terminated early")
  75.         end
  76.  
  77.         -- validate byte 2
  78.         if c == 240 and (c2 < 144 or c2 > 191) then
  79.             error("Invalid UTF-8 character")
  80.         elseif c == 244 and (c2 < 128 or c2 > 143) then
  81.             error("Invalid UTF-8 character")
  82.         elseif c2 < 128 or c2 > 191 then
  83.             error("Invalid UTF-8 character")
  84.         end
  85.        
  86.         -- validate byte 3
  87.         if c3 < 128 or c3 > 191 then
  88.             error("Invalid UTF-8 character")
  89.         end
  90.  
  91.         -- validate byte 4
  92.         if c4 < 128 or c4 > 191 then
  93.             error("Invalid UTF-8 character")
  94.         end
  95.  
  96.         return 4
  97.     else
  98.         error("Invalid UTF-8 character")
  99.     end
  100. end
  101.  
  102. -- returns the number of characters in a UTF-8 string
  103. local function utf8len(s)
  104.     -- argument checking
  105.     if type(s) ~= "string" then
  106.         error("bad argument #1 to 'utf8len' (string expected, got ".. type(s).. ")")
  107.     end
  108.  
  109.     local pos = 1
  110.     local bytes = s:len()
  111.     local len = 0
  112.  
  113.     while pos <= bytes do
  114.         len = len + 1
  115.         pos = pos + utf8charbytes(s, pos)
  116.     end
  117.  
  118.     return len
  119. end
  120. string.utf8len = utf8len
  121.  
  122. -- functions identically to string.sub except that i and j are UTF-8 characters
  123. -- instead of bytes
  124. local function utf8sub(s, i, j)
  125.     -- argument defaults
  126.     j = j or -1
  127.  
  128.     -- argument checking
  129.     if type(s) ~= "string" then
  130.         error("bad argument #1 to 'utf8sub' (string expected, got ".. type(s).. ")")
  131.     end
  132.     if type(i) ~= "number" then
  133.         error("bad argument #2 to 'utf8sub' (number expected, got ".. type(i).. ")")
  134.     end
  135.     if type(j) ~= "number" then
  136.         error("bad argument #3 to 'utf8sub' (number expected, got ".. type(j).. ")")
  137.     end
  138.  
  139.     local pos = 1
  140.     local bytes = s:len()
  141.     local len = 0
  142.  
  143.     -- only set l if i or j is negative
  144.     local l = (i >= 0 and j >= 0) or s:utf8len()
  145.     local startChar = i >= 0 and i or l + i + 1
  146.     local endChar   = j >= 0 and j or l + j + 1
  147.  
  148.     -- can't have start before end!
  149.     if startChar > endChar then
  150.         return ""
  151.     end
  152.  
  153.     -- byte offsets to pass to string.sub
  154.     local startByte, endByte = 1, bytes
  155.  
  156.     while pos <= bytes do
  157.         len = len + 1
  158.  
  159.         if len == startChar then
  160.             startByte = pos
  161.         end
  162.  
  163.         pos = pos + utf8charbytes(s, pos)
  164.  
  165.         if len == endChar then
  166.             endByte = pos - 1
  167.             break
  168.         end
  169.     end
  170.  
  171.     return s:sub(startByte, endByte)
  172. end
  173. string.utf8sub = utf8sub
  174.  
  175. -- replace UTF-8 characters based on a mapping table
  176. local function utf8replace(s, mapping)
  177.     -- argument checking
  178.     if type(s) ~= "string" then
  179.         error("bad argument #1 to 'utf8replace' (string expected, got ".. type(s).. ")")
  180.     end
  181.     if type(mapping) ~= "table" then
  182.         error("bad argument #2 to 'utf8replace' (table expected, got ".. type(mapping).. ")")
  183.     end
  184.  
  185.     local pos = 1
  186.     local bytes = s:len()
  187.     local charbytes
  188.     local newstr = ""
  189.  
  190.     while pos <= bytes do
  191.         charbytes = utf8charbytes(s, pos)
  192.         local c = s:sub(pos, pos + charbytes - 1)
  193.  
  194.         newstr = newstr .. (mapping[c] or c)
  195.  
  196.         pos = pos + charbytes
  197.     end
  198.  
  199.     return newstr
  200. end
  201.  
  202. -- identical to string.upper except it knows about unicode simple case conversions
  203. local function utf8upper(s)
  204.     return utf8replace(s, utf8_lc_uc)
  205. end
  206. if utf8_lc_uc then
  207.     string.utf8upper = utf8upper
  208. end
  209.  
  210. -- identical to string.lower except it knows about unicode simple case conversions
  211. local function utf8lower(s)
  212.     return utf8replace(s, utf8_uc_lc)
  213. end
  214. if utf8_uc_lc then
  215.     string.utf8lower = utf8lower
  216. end
  217.  
  218. -- identical to string.reverse except that it supports UTF-8
  219. local function utf8reverse(s)
  220.     -- argument checking
  221.     if type(s) ~= "string" then
  222.         error("bad argument #1 to 'utf8reverse' (string expected, got ".. type(s).. ")")
  223.     end
  224.  
  225.     local bytes = s:len()
  226.     local pos = bytes
  227.     local charbytes
  228.     local newstr = ""
  229.  
  230.     while pos > 0 do
  231.         c = s:byte(pos)
  232.         while c >= 128 and c <= 191 do
  233.             pos = pos - 1
  234.             c = s:byte(pos)
  235.         end
  236.  
  237.         charbytes = utf8charbytes(s, pos)
  238.  
  239.         newstr = newstr .. s:sub(pos, pos + charbytes - 1)
  240.  
  241.         pos = pos - 1
  242.     end
  243.  
  244.     return newstr
  245. end
  246. string.utf8reverse = utf8reverse
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top