Guest User

Unit for basic UTF-8 handling in Free Pascal

a guest
Apr 7th, 2013
115
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Pascal 1.66 KB | None | 0 0
  1. // Some potentially buggy functions for UTF-8 in FreePascal
  2. // Consider this code snippet under public domain or the
  3. // WTFPL license, depending on your needs and local laws
  4. unit RBUTF8;
  5. {$MODE OBJFPC}{$H+}
  6. interface
  7.  
  8. function UTF8Next(s: string; I: Integer): Integer; inline;
  9. function UTF8Length(s: string): Integer; inline;
  10. function UTF8Copy(s: string; Start, Len: Integer): string; inline;
  11. function UTF8Index(s: string; Index: Integer): Integer; inline;
  12.  
  13. implementation
  14.  
  15. function UTF8Next(s: string; I: Integer): Integer; inline;
  16. begin
  17.   Result:=I;
  18.   if Length(s) < Result then begin end
  19.   else if (Ord(s[Result]) and $C0)=$C0 then Inc(Result, 2)
  20.   else if (Ord(s[Result]) and $E0)=$E0 then Inc(Result, 3)
  21.   else if (Ord(s[Result]) and $F0)=$F0 then Inc(Result, 4)
  22.   else if (Ord(s[Result]) and $F8)=$F8 then Inc(Result, 5)
  23.   else if (Ord(s[Result]) and $FC)=$FC then Inc(Result, 6)
  24.   else Inc(Result);
  25. end;
  26.  
  27. function UTF8Length(s: string): Integer; inline;
  28. var
  29.   I: Integer;
  30. begin
  31.   Result:=0;
  32.   I:=1;
  33.   while I <= Length(s) do begin
  34.     I:=UTF8Next(s, I);
  35.     Inc(Result);
  36.   end;
  37. end;
  38.  
  39. function UTF8Copy(s: string; Start, Len: Integer): string; inline;
  40. var
  41.   I, NI: Integer;
  42. begin
  43.   Result:='';
  44.   I:=UTF8Index(s, Start);
  45.   if (Len > 0) and (I <= Length(s)) then repeat
  46.     NI:=UTF8Next(s, I);
  47.     if I=NI then Break;
  48.     Result:=Result + Copy(s, I, NI - I);
  49.     I:=NI;
  50.     Dec(Len);
  51.   until (Len=0) or (I > Length(s));
  52. end;
  53.  
  54. function UTF8Index(s: string; Index: Integer): Integer; inline;
  55. var
  56.   I: Integer;
  57. begin
  58.   Result:=1;
  59.   I:=Index;
  60.   while I > 1 do begin
  61.     Result:=UTF8Next(s, Result);
  62.     Dec(I);
  63.   end;
  64. end;
  65.  
  66. end.
Add Comment
Please, Sign In to add comment