Pastebin launched a little side project called VERYVIRAL.com, check it out ;-) Want more features on Pastebin? Sign Up, it's FREE!
Guest

Barry Kelly

By: a guest on Nov 8th, 2009  |  syntax: Delphi  |  size: 6.00 KB  |  views: 136  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
This paste has a previous version, view the difference. Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. {$apptype console}
  2.  
  3. uses SysUtils, StrUtils, Diagnostics;
  4.  
  5. procedure Run(const AName: string; const Action: TProc);
  6. var
  7.   sw: TStopwatch;
  8. begin
  9.   sw := TStopwatch.Create;
  10.   sw.Start;
  11.   Action();
  12.   sw.Stop;
  13.  
  14.   Writeln(Format('%s: %3d ms', [AName, sw.ElapsedMilliseconds]));
  15.   Flush(Output);
  16. end;
  17.  
  18. function MakeLargeDelimString(ACount: Integer; ADelim: Char): string;
  19. begin
  20.   Result := StringOfChar('x', 50);
  21.   while ACount > 0 do
  22.   begin
  23.     Dec(ACount);
  24.     Result := Result + StringOfChar('y', 50);
  25.   end;
  26. end;
  27.  
  28. function GetTok1(const Line: string; const Delim: string; const TokenNum: Byte): string;
  29. { LK Feb 12, 2007 - This function has been optimized as best as possible }
  30. var
  31.  I, P, P2: integer;
  32. begin
  33.   P2 := Pos(Delim, Line);
  34.   if TokenNum = 1 then begin
  35.     if P2 = 0 then
  36.       Result := Line
  37.     else
  38.       Result := copy(Line, 1, P2-1);
  39.   end
  40.   else begin
  41.     P := 0; { To prevent warnings }
  42.     for I := 2 to TokenNum do begin
  43.       P := P2;
  44.       if P = 0 then break;
  45.       P2 := PosEx(Delim, Line, P+1);
  46.     end;
  47.     if P = 0 then
  48.       Result := ''
  49.     else if P2 = 0 then
  50.       Result := copy(Line, P+1, MaxInt)
  51.     else
  52.       Result := copy(Line, P+1, P2-P-1);
  53.   end;
  54. end; { GetTok }
  55.  
  56. function GetTok2(const Line: string; const Delim: Char; const TokenNum: Byte): string;
  57. { LK Feb 12, 2007 - This function has been optimized as best as possible }
  58. { LK Nov 7, 2009 - Reoptimized using PChars instead of calls to Pos and PosEx }
  59. { See; http://stackoverflow.com/questions/1694001/is-there-a-fast-gettoken-routine-for-delphi }
  60. var
  61.  I: integer;
  62.  PLine, PStart: PChar;
  63. begin
  64.   PLine := PChar(Line);
  65.   PStart := PLine;
  66.   inc(PLine);
  67.   for I := 1 to TokenNum do begin
  68.     while (PLine^ <> #0) and (PLine^ <> Delim) do
  69.       inc(PLine);
  70.     if I = TokenNum then begin
  71.       SetString(Result, PStart, PLine - PStart);
  72.       break;
  73.     end;
  74.     if PLine^ = #0 then begin
  75.       Result := '';
  76.       break;
  77.     end;
  78.     inc(PLine);
  79.     PStart := PLine;
  80.   end;
  81. end; { GetTok }
  82.  
  83. // Using indexing to get delimiter character
  84. // this assumes that Delim has length(1); probably a worthwhile special case
  85. function GetTok3(const Line: string; const Delim: string; const TokenNum: Byte): string;
  86. var
  87.  I: integer;
  88.  PLine, PStart: PChar;
  89. begin
  90.   PLine := PChar(Line);
  91.   PStart := PLine;
  92.   inc(PLine);
  93.   for I := 1 to TokenNum do begin
  94.     while (PLine^ <> #0) and (PLine^ <> Delim[1]) do
  95.       inc(PLine);
  96.     if I = TokenNum then begin
  97.       SetString(Result, PStart, PLine - PStart);
  98.       break;
  99.     end;
  100.     if PLine^ = #0 then begin
  101.       Result := '';
  102.       break;
  103.     end;
  104.     inc(PLine);
  105.     PStart := PLine;
  106.   end;
  107. end; { GetTok }
  108.  
  109. // Do all tokenization up front.
  110. function GetTok4(const Line: string; const Delim: Char): TArray<string>;
  111. var
  112.   cp, start: PChar;
  113.   count: Integer;
  114. begin
  115.   // Count sections
  116.   count := 1;
  117.   cp := PChar(Line);
  118.   start := cp;
  119.   while True do
  120.   begin
  121.     if cp^ <> #0 then
  122.     begin
  123.       if cp^ <> Delim then
  124.         Inc(cp)
  125.       else
  126.       begin
  127.         Inc(cp);
  128.         Inc(count);
  129.       end;
  130.     end
  131.     else
  132.     begin
  133.       Inc(count);
  134.       Break;
  135.     end;
  136.   end;
  137.  
  138.   SetLength(Result, count);
  139.   cp := start;
  140.   count := 0;
  141.  
  142.   while True do
  143.   begin
  144.     if cp^ <> #0 then
  145.     begin
  146.       if cp^ <> Delim then
  147.         Inc(cp)
  148.       else
  149.       begin
  150.         SetString(Result[count], start, cp - start);
  151.         Inc(cp);
  152.         Inc(count);
  153.       end;
  154.     end
  155.     else
  156.     begin
  157.       SetString(Result[count], start, cp - start);
  158.       Break;
  159.     end;
  160.   end;
  161. end;
  162.  
  163. type
  164.   TTokenizer = record
  165.   private
  166.     FSource: string;
  167.     FCurrPos: PChar;
  168.     FDelim: Char;
  169.   public
  170.     procedure Reset(const ASource: string; ADelim: Char); inline;
  171.     function GetToken(out AResult: string): Boolean; inline;
  172.   end;
  173.  
  174. procedure TTokenizer.Reset(const ASource: string; ADelim: Char);
  175. begin
  176.   FSource := ASource; // keep reference alive
  177.   FCurrPos := PChar(FSource);
  178.   FDelim := ADelim;
  179. end;
  180.  
  181. function TTokenizer.GetToken(out AResult: string): Boolean;
  182. var
  183.   cp, start: PChar;
  184.   delim: Char;
  185. begin
  186.   // copy members to locals for better optimization
  187.   cp := FCurrPos;
  188.   delim := FDelim;
  189.  
  190.   if cp^ = #0 then
  191.   begin
  192.     AResult := '';
  193.     Exit(False);
  194.   end;
  195.  
  196.   start := cp;
  197.   while (cp^ <> #0) and (cp^ <> Delim) do
  198.     Inc(cp);
  199.  
  200.   SetString(AResult, start, cp - start);
  201.   if cp^ = Delim then
  202.     Inc(cp);
  203.   FCurrPos := cp;
  204.   Result := True;
  205. end;
  206.  
  207. var
  208.   n: Integer;
  209.   src: string;
  210.   theDelim: Char;
  211.   count: Integer;
  212. begin
  213.   try
  214.     for n := 1 to 5 do
  215.     begin
  216.       theDelim := '|';
  217.       count := 3 * n;
  218.       src := MakeLargeDelimString(count, theDelim);
  219.       Writeln(Format('*** count=%d, Length(src)=%d', [count, Length(src)]));
  220.      
  221.       Run('GetTok1', procedure
  222.       var
  223.         i, j: Integer;
  224.       begin
  225.         for i := 1 to 1000000 do
  226.           for j := 1 to count + 1 do
  227.             GetTok1(src, theDelim, j);
  228.       end);
  229.      
  230.       Run('GetTok2', procedure
  231.       var
  232.         i, j: Integer;
  233.       begin
  234.         for i := 1 to 1000000 do
  235.           for j := 1 to count + 1 do
  236.             GetTok2(src, theDelim, j);
  237.       end);
  238.      
  239.       Run('GetTok3', procedure
  240.       var
  241.         i, j: Integer;
  242.       begin
  243.         for i := 1 to 1000000 do
  244.           for j := 1 to count + 1 do
  245.             GetTok3(src, theDelim, j);
  246.       end);
  247.      
  248.       Run('GetTok4', procedure
  249.       var
  250.         i: Integer;
  251.       begin
  252.         for i := 1 to 1000000 do
  253.           GetTok4(src, theDelim); // extracts all tokens
  254.       end);
  255.      
  256.       Run('GetTokBK', procedure
  257.       var
  258.         i: Integer;
  259.         tok: TTokenizer;
  260.         s: string;
  261.       begin
  262.         for i := 1 to 1000000 do
  263.         begin
  264.           tok.Reset(src, theDelim);
  265.           while tok.GetToken(s) do // runs count times, uses retvalue for stop, not ''
  266.             ;
  267.         end;
  268.       end);
  269.     end;
  270.   except
  271.     on e: Exception do
  272.       Writeln(e.Message);
  273.   end;
  274. end.