View difference between Paste ID: f5a456f97 and
SHOW: | | - or go back to the newest paste.
1
{$apptype console}
2
3
uses SysUtils, StrUtils, Diagnostics;
4
5
procedure Run(const AName: string; const Action: TProc);
6
var
7
  sw: TStopwatch;
8
begin
9
  sw := TStopwatch.Create;
10
  sw.Start;
11
  Action();
12
  sw.Stop;
13
  
14
  Writeln(Format('%s: %3d ms', [AName, sw.ElapsedMilliseconds]));
15
  Flush(Output);
16
end;
17
18
function MakeLargeDelimString(ACount: Integer; ADelim: Char): string;
19
begin
20
  Result := StringOfChar('x', 50);
21
  while ACount > 0 do
22
  begin
23
    Dec(ACount);
24
    Result := Result + StringOfChar('y', 50);
25
  end;
26
end;
27
28
function GetTok1(const Line: string; const Delim: string; const TokenNum: Byte): string;
29
{ LK Feb 12, 2007 - This function has been optimized as best as possible }
30
var
31
 I, P, P2: integer;
32
begin
33
  P2 := Pos(Delim, Line);
34
  if TokenNum = 1 then begin
35
    if P2 = 0 then
36
      Result := Line
37
    else
38
      Result := copy(Line, 1, P2-1);
39
  end
40
  else begin
41
    P := 0; { To prevent warnings }
42
    for I := 2 to TokenNum do begin
43
      P := P2;
44
      if P = 0 then break;
45
      P2 := PosEx(Delim, Line, P+1);
46
    end;
47
    if P = 0 then
48
      Result := ''
49
    else if P2 = 0 then
50
      Result := copy(Line, P+1, MaxInt)
51
    else
52
      Result := copy(Line, P+1, P2-P-1);
53
  end;
54
end; { GetTok }
55
56
function GetTok2(const Line: string; const Delim: Char; const TokenNum: Byte): string;
57
{ LK Feb 12, 2007 - This function has been optimized as best as possible }
58
{ LK Nov 7, 2009 - Reoptimized using PChars instead of calls to Pos and PosEx }
59
{ See; http://stackoverflow.com/questions/1694001/is-there-a-fast-gettoken-routine-for-delphi }
60
var
61
 I: integer;
62
 PLine, PStart: PChar;
63
begin
64
  PLine := PChar(Line);
65
  PStart := PLine;
66
  inc(PLine);
67
  for I := 1 to TokenNum do begin
68
    while (PLine^ <> #0) and (PLine^ <> Delim) do
69
      inc(PLine);
70
    if I = TokenNum then begin
71
      SetString(Result, PStart, PLine - PStart);
72
      break;
73
    end;
74
    if PLine^ = #0 then begin
75
      Result := '';
76
      break;
77
    end;
78
    inc(PLine);
79
    PStart := PLine;
80
  end;
81
end; { GetTok }
82
83
// Using indexing to get delimiter character
84
// this assumes that Delim has length(1); probably a worthwhile special case
85
function GetTok3(const Line: string; const Delim: string; const TokenNum: Byte): string;
86
var
87
 I: integer;
88
 PLine, PStart: PChar;
89
begin
90
  PLine := PChar(Line);
91
  PStart := PLine;
92
  inc(PLine);
93
  for I := 1 to TokenNum do begin
94
    while (PLine^ <> #0) and (PLine^ <> Delim[1]) do
95
      inc(PLine);
96
    if I = TokenNum then begin
97
      SetString(Result, PStart, PLine - PStart);
98
      break;
99
    end;
100
    if PLine^ = #0 then begin
101
      Result := '';
102
      break;
103
    end;
104
    inc(PLine);
105
    PStart := PLine;
106
  end;
107
end; { GetTok }
108
109
// Do all tokenization up front.
110
function GetTok4(const Line: string; const Delim: Char): TArray<string>;
111
var
112
  cp, start: PChar;
113
  count: Integer;
114
begin
115
  // Count sections
116
  count := 1;
117
  cp := PChar(Line);
118
  start := cp;
119
  while True do
120
  begin
121
    if cp^ <> #0 then
122
    begin
123
      if cp^ <> Delim then
124
        Inc(cp)
125
      else
126
      begin
127
        Inc(cp);
128
        Inc(count);
129
      end;
130
    end
131
    else
132
    begin
133
      Inc(count);
134
      Break;
135
    end;
136
  end;
137
  
138
  SetLength(Result, count);
139
  cp := start;
140
  count := 0;
141
  
142
  while True do
143
  begin
144
    if cp^ <> #0 then
145
    begin
146
      if cp^ <> Delim then
147
        Inc(cp)
148
      else
149
      begin
150
        SetString(Result[count], start, cp - start);
151
        Inc(cp);
152
        Inc(count);
153
      end;
154
    end
155
    else
156
    begin
157
      SetString(Result[count], start, cp - start);
158
      Break;
159
    end;
160
  end;
161
end;
162
163
type
164
  TTokenizer = record
165
  private
166
    FSource: string;
167
    FCurrPos: PChar;
168
    FDelim: Char;
169
  public
170
    procedure Reset(const ASource: string; ADelim: Char); inline;
171
    function GetToken(out AResult: string): Boolean; inline;
172
  end;
173
174
procedure TTokenizer.Reset(const ASource: string; ADelim: Char);
175
begin
176
  FSource := ASource; // keep reference alive
177
  FCurrPos := PChar(FSource);
178
  FDelim := ADelim;
179
end;
180
181
function TTokenizer.GetToken(out AResult: string): Boolean;
182
var
183
  cp, start: PChar;
184
  delim: Char;
185
begin
186
  // copy members to locals for better optimization
187
  cp := FCurrPos;
188
  delim := FDelim;
189
  
190
  if cp^ = #0 then
191
  begin
192
    AResult := '';
193
    Exit(False);
194
  end;
195
  
196
  start := cp;
197
  while (cp^ <> #0) and (cp^ <> Delim) do
198
    Inc(cp);
199
  
200
  SetString(AResult, start, cp - start);
201
  if cp^ = Delim then
202
    Inc(cp);
203
  FCurrPos := cp;
204
  Result := True;
205
end;
206
207
var
208
  n: Integer;
209
  src: string;
210
  theDelim: Char;
211
  count: Integer;
212
begin
213
  try
214
    for n := 1 to 5 do
215
    begin
216
      theDelim := '|';
217
      count := 3 * n;
218
      src := MakeLargeDelimString(count, theDelim);
219
      Writeln(Format('*** count=%d, Length(src)=%d', [count, Length(src)]));
220
      
221
      Run('GetTok1', procedure
222
      var
223
        i, j: Integer;
224
      begin
225
        for i := 1 to 1000000 do
226
          for j := 1 to count + 1 do
227
            GetTok1(src, theDelim, j);
228
      end);
229
      
230
      Run('GetTok2', procedure
231
      var
232
        i, j: Integer;
233
      begin
234
        for i := 1 to 1000000 do
235
          for j := 1 to count + 1 do
236
            GetTok2(src, theDelim, j);
237
      end);
238
      
239
      Run('GetTok3', procedure
240
      var
241
        i, j: Integer;
242
      begin
243
        for i := 1 to 1000000 do
244
          for j := 1 to count + 1 do
245
            GetTok3(src, theDelim, j);
246
      end);
247
      
248
      Run('GetTok4', procedure
249
      var
250
        i: Integer;
251
      begin
252
        for i := 1 to 1000000 do
253
          GetTok4(src, theDelim); // extracts all tokens
254
      end);
255
      
256
      Run('GetTokBK', procedure
257
      var
258
        i: Integer;
259
        tok: TTokenizer;
260
        s: string;
261
      begin
262
        for i := 1 to 1000000 do
263
        begin
264
          tok.Reset(src, theDelim);
265
          while tok.GetToken(s) do // runs count times, uses retvalue for stop, not ''
266
            ;
267
        end;
268
      end);
269
    end;
270
  except
271
    on e: Exception do
272
      Writeln(e.Message);
273
  end;
274
end.
275