Skip to content

Commit 760539e

Browse files
committed
own dictionary implementation, apparently has better collision resolution. 35s -> 30s
1 parent 9bec7dd commit 760539e

1 file changed

Lines changed: 90 additions & 4 deletions

File tree

entries/ghatem-fpc/src/onebrc.pas

Lines changed: 90 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ interface
1010

1111
function RoundExDouble(const ATemp: Double): Double; inline;
1212

13+
const
14+
cDictSize: Integer = 45000;
15+
1316
type
1417

1518
// record is packed to minimize its size
@@ -24,6 +27,24 @@ function RoundExDouble(const ATemp: Double): Double; inline;
2427
PStationData = ^TStationData;
2528
TStationsDict = specialize TDictionary<Cardinal, PStationData>;
2629

30+
TKeys = array of Cardinal;
31+
TValues = array of PStationData;
32+
33+
{ TMyDictionary }
34+
35+
TMyDictionary = class
36+
private
37+
FHashes: TKeys;
38+
FData : TValues;
39+
procedure InternalFind(const aKey: Cardinal; out aFound: Boolean; out aIndex: Integer); inline;
40+
public
41+
constructor Create;
42+
property Keys: TKeys read FHashes;
43+
property Values: TValues read FData;
44+
function TryGetValue (const aKey: Cardinal; out aValue: PStationData): Boolean; inline;
45+
procedure Add (const aKey: Cardinal; const aValue: PStationData); inline;
46+
end;
47+
2748
{ TOneBRC }
2849

2950
TOneBRC = class
@@ -36,7 +57,7 @@ TOneBRC = class
3657

3758
FThreadCount: UInt16;
3859
FThreads: array of TThread;
39-
FStationsDicts: array of TStationsDict;
60+
FStationsDicts: array of TMyDictionary;
4061

4162
procedure ExtractLineData(const aStart: Int64; const aEnd: Int64; out aLength: ShortInt; out aTemp: SmallInt); inline;
4263

@@ -106,6 +127,69 @@ function Compare(AList: TStringList; AIndex1, AIndex2: Integer): Integer;
106127
Result := CompareStr(Str1, Str2);
107128
end;
108129

130+
{ TMyDictionary }
131+
132+
procedure TMyDictionary.InternalFind(const aKey: Cardinal; out aFound: Boolean; out aIndex: Integer);
133+
var vIdx: Integer;
134+
begin
135+
vIdx := aKey mod cDictSize;
136+
aFound := False;
137+
138+
if FHashes[vIdx] = aKey then begin
139+
aIndex := vIdx;
140+
aFound := True;
141+
end
142+
else begin
143+
while True do begin
144+
Inc (vIdx);
145+
if vIdx >= cDictSize then
146+
Dec (vIdx, cDictSize);
147+
if FHashes[vIdx] = aKey then begin
148+
aIndex := vIdx;
149+
aFound := True;
150+
break;
151+
end
152+
else if FHashes[vIdx] = 0 then begin
153+
aIndex := vIdx;
154+
aFound := False;
155+
break;
156+
end;
157+
end;
158+
end;
159+
end;
160+
161+
constructor TMyDictionary.Create;
162+
begin
163+
SetLength (FHashes, cDictSize);
164+
SetLength (FData, cDictSize);
165+
end;
166+
167+
function TMyDictionary.TryGetValue(const aKey: Cardinal; out aValue: PStationData): Boolean;
168+
var
169+
vIdx: Integer;
170+
begin
171+
InternalFind (aKey, Result, vIdx);
172+
173+
if Result then
174+
aValue := FData[vIdx]
175+
else
176+
aValue := nil;
177+
end;
178+
179+
procedure TMyDictionary.Add(const aKey: Cardinal; const aValue: PStationData);
180+
var
181+
vIdx: Integer;
182+
vFound: Boolean;
183+
begin
184+
InternalFind (aKey, vFound, vIdx);
185+
if not vFound then begin
186+
FHashes[vIdx] := aKey;
187+
FData[vIdx] := aValue;
188+
end
189+
else
190+
raise Exception.Create ('TMyDict: cannot add, duplicate key');
191+
end;
192+
109193
procedure TOneBRC.ExtractLineData(const aStart: Int64; const aEnd: Int64; out aLength: ShortInt; out aTemp: SmallInt);
110194
// given a line of data, extract the length of station name, and temperature as Integer.
111195
var
@@ -155,8 +239,8 @@ constructor TOneBRC.Create (const aThreadCount: UInt16);
155239
SetLength (FThreads, aThreadCount);
156240

157241
for I := 0 to aThreadCount - 1 do begin
158-
FStationsDicts[I] := TStationsDict.Create;
159-
FStationsDicts[I].Capacity := 45000;
242+
FStationsDicts[I] := TMyDictionary.Create;
243+
//FStationsDicts[I].Capacity := 45000;
160244
end;
161245
end;
162246

@@ -280,6 +364,7 @@ procedure TOneBRC.Merge(aLeft: UInt16; aRight: UInt16);
280364
vDataL: PStationData;
281365
begin
282366
for iHash in FStationsDicts[aRight].Keys do begin
367+
if iHash = 0 then continue;
283368
FStationsDicts[aRight].TryGetValue(iHash, vDataR);
284369

285370
if FStationsDicts[aLeft].TryGetValue(iHash, vDataL) then begin
@@ -322,7 +407,8 @@ procedure TOneBRC.GenerateOutput;
322407
try
323408
vStations.BeginUpdate;
324409
for vData in FStationsDicts[0].Values do begin
325-
vStations.Add(vData^.Name);
410+
if vData <> nil then
411+
vStations.Add(vData^.Name);
326412
end;
327413
vStations.EndUpdate;
328414

0 commit comments

Comments
 (0)