SHOW:
|
|
- or go back to the newest paste.
1 | program Project28; | |
2 | ||
3 | {$APPTYPE CONSOLE} | |
4 | ||
5 | uses SysUtils, Windows, Generics.Collections; | |
6 | ||
7 | type TFindDup = function: Integer; | |
8 | ||
9 | function TimeProc(Proc: TProcedure): Int64; | |
10 | var EndCount: Int64; | |
11 | begin | |
12 | QueryPerformanceCounter(Result); | |
13 | Proc; | |
14 | QueryPerformanceCounter(EndCount); | |
15 | Result := EndCount - Result; | |
16 | end; | |
17 | ||
18 | var WorkArray: TArray<string>; | |
19 | ||
20 | function SortAndScan:Integer; | |
21 | var i: Integer; | |
22 | prevstr, cstr: string; | |
23 | begin | |
24 | TArray.Sort<string>(WorkArray); | |
25 | prevstr := WorkArray[0]; | |
26 | for i:=1 to High(WorkArray) do | |
27 | begin | |
28 | cstr := WorkArray[i]; | |
29 | if cstr = prevstr then | |
30 | Exit(-1); | |
31 | prevstr := cstr; | |
32 | end; | |
33 | Result := -1; | |
34 | end; | |
35 | ||
36 | type TZeroWidthRecord = record end; | |
37 | function TestUsingDict:Integer; | |
38 | var D: TDictionary<string, TZeroWidthRecord>; | |
39 | i: Integer; | |
40 | s: string; | |
41 | ZW: TZeroWidthRecord; | |
42 | begin | |
43 | D := TDictionary<string, TZeroWidthRecord>.Create(Length(WorkArray)); | |
44 | try | |
45 | for i:=0 to High(WorkArray) do | |
46 | begin | |
47 | s := WorkArray[i]; | |
48 | - | if D.ContainsKey(s) then |
48 | + | try |
49 | - | Exit(i) |
49 | + | |
50 | - | else |
50 | + | except on E:Exception do |
51 | Exit(i); | |
52 | end; | |
53 | end; | |
54 | finally D.Free; | |
55 | end; | |
56 | Result := -1; | |
57 | end; | |
58 | - | procedure TestNumberOfStrings(const Cnt:Integer); |
58 | + | |
59 | function TestNumberOfStrings(const Cnt, MaxWordCount:Integer): Boolean; // Returns TRUE if the dictionary was 25% faster then the sort-and-scan | |
60 | var Total_Sort: Int64; | |
61 | Total_Dict: Int64; | |
62 | i,j,k: Integer; | |
63 | TestArray: TArray<string>; | |
64 | UniqueDict: TDictionary<string, TZeroWidthRecord>; | |
65 | WordCount: Integer; | |
66 | s: string; | |
67 | ZW: TZeroWidthRecord; | |
68 | TotalStrLen, AvgStrLen: Integer; | |
69 | const distinct_arrays = 5; | |
70 | pass_per_array = 5; | |
71 | WordList:array[0..142] of string = | |
72 | ( | |
73 | 'test', 'find', 'duplicate', 'speed', 'for', 'using', 'a', 'number', 'of', 'separate', | |
74 | 'algorithms', 'designed', 'to', 'investigate', 'how', 'efficient', 'tdictionary', | |
75 | 'really', 'is', | |
76 | 'In', 'computing', 'a', 'hash', 'table', 'also', 'hash', 'map', 'is', 'a', 'data', 'structure', | |
77 | 'used', 'to', 'implement', 'an', 'associative', 'array', 'a', 'structure', 'that', 'can', 'map', | |
78 | 'keys', 'to', 'values', 'A', 'hash', 'table', 'uses', 'a', 'hash', 'function', 'to', 'compute', 'an', | |
79 | 'index', 'into', 'an', 'array', 'of', 'buckets', 'or', 'slots', 'from', 'which', 'the', 'correct', 'value', 'can', 'be', 'found', | |
80 | 'Ideally', 'the', 'hash', 'function', 'should', 'assign', 'each', 'possible', 'key', 'to', 'a', 'unique', 'bucket', | |
81 | 'but', 'this', 'ideal', 'situation', 'is', 'rarely', 'achievable', 'in', 'practice', 'unless', 'the', 'hash', 'keys', | |
82 | 'are', 'fixed', 'i.e.', 'new', 'entries', 'are', 'never', 'added', 'to', 'the', 'table', 'after', 'it', 'is', 'created', | |
83 | 'Instead', 'most', 'hash', 'table', 'designs', 'assume', 'that', 'hash', 'collisions', 'different', 'keys', | |
84 | 'that', 'are', 'assigned', 'by', 'the', 'hash', 'function', 'to', 'the', 'same', 'bucket', 'will', 'occur', 'and', 'must', 'be', 'accommodated', 'in', 'some', 'way' | |
85 | ); | |
86 | begin | |
87 | WriteLn('Testing with ', Cnt, ' strings.'); | |
88 | Total_Sort := 0; | |
89 | Total_Dict := 0; | |
90 | ||
91 | SetLength(TestArray, Cnt); | |
92 | SetLength(WorkArray, Cnt); | |
93 | ||
94 | TotalStrLen := 0; | |
95 | for i:=1 to distinct_arrays do | |
96 | begin | |
97 | UniqueDict := TDictionary<string, TZeroWidthRecord>.Create; | |
98 | try | |
99 | - | WordCount := Random(4); |
99 | + | |
100 | begin | |
101 | repeat | |
102 | WordCount := Random(MaxWordCount); | |
103 | s := ''; | |
104 | if WordCount > 0 then | |
105 | begin | |
106 | s := WordList[Random(High(WordList))]; | |
107 | for k:=2 to WordCount do | |
108 | s := s + ' ' + WordList[Random(High(WordList))]; | |
109 | end; | |
110 | until not UniqueDict.ContainsKey(s); | |
111 | ||
112 | UniqueDict.Add(s, ZW); | |
113 | TestArray[j] := s; | |
114 | Inc(TotalStrLen, Length(s)); | |
115 | ||
116 | end; | |
117 | ||
118 | // Make sure the array contains a duplicate: | |
119 | // TestArray[Cnt div 4] := TestArray[0]; | |
120 | ||
121 | finally UniqueDict.Free; | |
122 | end; | |
123 | ||
124 | for k:=0 to High(WorkArray) do WorkArray[k] := TestArray[k]; | |
125 | for j:=1 to pass_per_array do | |
126 | begin | |
127 | - | WriteLn(': Dict=', Total_Dict, '; Sort=', Total_Sort); |
127 | + | |
128 | end; | |
129 | ||
130 | for j:=1 to pass_per_array do | |
131 | begin | |
132 | for k:=0 to High(WorkArray) do WorkArray[k] := TestArray[k]; | |
133 | Total_Sort := Total_Sort + TimeProc(@SortAndScan); | |
134 | - | TestNumberOfStrings(50); |
134 | + | |
135 | - | TestNumberOfStrings(100); |
135 | + | |
136 | - | TestNumberOfStrings(500); |
136 | + | |
137 | - | TestNumberOfStrings(1000); |
137 | + | AvgStrLen := TotalStrLen div (Cnt * distinct_arrays); |
138 | - | TestNumberOfStrings(5000); |
138 | + | if Total_Dict < Total_Sort then |
139 | - | TestNumberOfStrings(50000); |
139 | + | |
140 | - | TestNumberOfStrings(100000); |
140 | + | Result := True; |
141 | - | TestNumberOfStrings(200000); |
141 | + | s := 'Dict was ' + IntToStr(100 - (Total_Dict * 100) div Total_Sort) + '% faster'; |
142 | end | |
143 | else | |
144 | begin | |
145 | Result := False; | |
146 | s := 'Sort was ' + IntToStr(100 - (Total_Sort * 100) div Total_Dict) + '% faster'; | |
147 | end; | |
148 | WriteLn(': Dict=', Total_Dict, '; Sort=', Total_Sort, ';AvgStrLen=', AvgStrLen, ': ', s); | |
149 | end; | |
150 | ||
151 | var WordCount: Integer; | |
152 | N: Integer; | |
153 | ||
154 | begin | |
155 | try | |
156 | Randomize; | |
157 | ||
158 | for WordCount := 11 to 20 do | |
159 | begin | |
160 | WriteLn('Testing with MaxWordCount=', WordCount); | |
161 | N := 5; | |
162 | while (N < 200000) and not TestNumberOfStrings(N, WordCount) do | |
163 | begin | |
164 | N := (N * 3) div 2; | |
165 | end; | |
166 | WriteLn; | |
167 | WriteLn; | |
168 | end; | |
169 | ||
170 | ReadLn; | |
171 | except | |
172 | on E: Exception do Writeln(E.ClassName, ': ', E.Message); | |
173 | end; | |
174 | end. |