Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {$apptype console}
- uses SysUtils, StrUtils, Diagnostics;
- procedure Run(const AName: string; const Action: TProc);
- var
- sw: TStopwatch;
- begin
- sw := TStopwatch.Create;
- sw.Start;
- Action();
- sw.Stop;
- Writeln(Format('%s: %3d ms', [AName, sw.ElapsedMilliseconds]));
- Flush(Output);
- end;
- function MakeLargeDelimString(ACount: Integer; ADelim: Char): string;
- begin
- Result := StringOfChar('x', 50);
- while ACount > 0 do
- begin
- Dec(ACount);
- Result := Result + StringOfChar('y', 50);
- end;
- end;
- function GetTok1(const Line: string; const Delim: string; const TokenNum: Byte): string;
- { LK Feb 12, 2007 - This function has been optimized as best as possible }
- var
- I, P, P2: integer;
- begin
- P2 := Pos(Delim, Line);
- if TokenNum = 1 then begin
- if P2 = 0 then
- Result := Line
- else
- Result := copy(Line, 1, P2-1);
- end
- else begin
- P := 0; { To prevent warnings }
- for I := 2 to TokenNum do begin
- P := P2;
- if P = 0 then break;
- P2 := PosEx(Delim, Line, P+1);
- end;
- if P = 0 then
- Result := ''
- else if P2 = 0 then
- Result := copy(Line, P+1, MaxInt)
- else
- Result := copy(Line, P+1, P2-P-1);
- end;
- end; { GetTok }
- function GetTok2(const Line: string; const Delim: Char; const TokenNum: Byte): string;
- { LK Feb 12, 2007 - This function has been optimized as best as possible }
- { LK Nov 7, 2009 - Reoptimized using PChars instead of calls to Pos and PosEx }
- { See; http://stackoverflow.com/questions/1694001/is-there-a-fast-gettoken-routine-for-delphi }
- var
- I: integer;
- PLine, PStart: PChar;
- begin
- PLine := PChar(Line);
- PStart := PLine;
- inc(PLine);
- for I := 1 to TokenNum do begin
- while (PLine^ <> #0) and (PLine^ <> Delim) do
- inc(PLine);
- if I = TokenNum then begin
- SetString(Result, PStart, PLine - PStart);
- break;
- end;
- if PLine^ = #0 then begin
- Result := '';
- break;
- end;
- inc(PLine);
- PStart := PLine;
- end;
- end; { GetTok }
- // Using indexing to get delimiter character
- // this assumes that Delim has length(1); probably a worthwhile special case
- function GetTok3(const Line: string; const Delim: string; const TokenNum: Byte): string;
- var
- I: integer;
- PLine, PStart: PChar;
- begin
- PLine := PChar(Line);
- PStart := PLine;
- inc(PLine);
- for I := 1 to TokenNum do begin
- while (PLine^ <> #0) and (PLine^ <> Delim[1]) do
- inc(PLine);
- if I = TokenNum then begin
- SetString(Result, PStart, PLine - PStart);
- break;
- end;
- if PLine^ = #0 then begin
- Result := '';
- break;
- end;
- inc(PLine);
- PStart := PLine;
- end;
- end; { GetTok }
- // Do all tokenization up front.
- function GetTok4(const Line: string; const Delim: Char): TArray<string>;
- var
- cp, start: PChar;
- count: Integer;
- begin
- // Count sections
- count := 1;
- cp := PChar(Line);
- start := cp;
- while True do
- begin
- if cp^ <> #0 then
- begin
- if cp^ <> Delim then
- Inc(cp)
- else
- begin
- Inc(cp);
- Inc(count);
- end;
- end
- else
- begin
- Inc(count);
- Break;
- end;
- end;
- SetLength(Result, count);
- cp := start;
- count := 0;
- while True do
- begin
- if cp^ <> #0 then
- begin
- if cp^ <> Delim then
- Inc(cp)
- else
- begin
- SetString(Result[count], start, cp - start);
- Inc(cp);
- Inc(count);
- end;
- end
- else
- begin
- SetString(Result[count], start, cp - start);
- Break;
- end;
- end;
- end;
- type
- TTokenizer = record
- private
- FSource: string;
- FCurrPos: PChar;
- FDelim: Char;
- public
- procedure Reset(const ASource: string; ADelim: Char); inline;
- function GetToken(out AResult: string): Boolean; inline;
- end;
- procedure TTokenizer.Reset(const ASource: string; ADelim: Char);
- begin
- FSource := ASource; // keep reference alive
- FCurrPos := PChar(FSource);
- FDelim := ADelim;
- end;
- function TTokenizer.GetToken(out AResult: string): Boolean;
- var
- cp, start: PChar;
- delim: Char;
- begin
- // copy members to locals for better optimization
- cp := FCurrPos;
- delim := FDelim;
- if cp^ = #0 then
- begin
- AResult := '';
- Exit(False);
- end;
- start := cp;
- while (cp^ <> #0) and (cp^ <> Delim) do
- Inc(cp);
- SetString(AResult, start, cp - start);
- if cp^ = Delim then
- Inc(cp);
- FCurrPos := cp;
- Result := True;
- end;
- var
- n: Integer;
- src: string;
- theDelim: Char;
- count: Integer;
- begin
- try
- for n := 1 to 5 do
- begin
- theDelim := '|';
- count := 3 * n;
- src := MakeLargeDelimString(count, theDelim);
- Writeln(Format('*** count=%d, Length(src)=%d', [count, Length(src)]));
- Run('GetTok1', procedure
- var
- i, j: Integer;
- begin
- for i := 1 to 1000000 do
- for j := 1 to count + 1 do
- GetTok1(src, theDelim, j);
- end);
- Run('GetTok2', procedure
- var
- i, j: Integer;
- begin
- for i := 1 to 1000000 do
- for j := 1 to count + 1 do
- GetTok2(src, theDelim, j);
- end);
- Run('GetTok3', procedure
- var
- i, j: Integer;
- begin
- for i := 1 to 1000000 do
- for j := 1 to count + 1 do
- GetTok3(src, theDelim, j);
- end);
- Run('GetTok4', procedure
- var
- i: Integer;
- begin
- for i := 1 to 1000000 do
- GetTok4(src, theDelim); // extracts all tokens
- end);
- Run('GetTokBK', procedure
- var
- i: Integer;
- tok: TTokenizer;
- s: string;
- begin
- for i := 1 to 1000000 do
- begin
- tok.Reset(src, theDelim);
- while tok.GetToken(s) do // runs count times, uses retvalue for stop, not ''
- ;
- end;
- end);
- end;
- except
- on e: Exception do
- Writeln(e.Message);
- end;
- end.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement