Merge pull request #151 from georges-hatem/main

gcarreno · web-flow · commit b6b4be610c8f · 2024-05-04T16:33:23.000+01:00
replace FormatFloat with own implementation
diff --git a/entries/ghatem-fpc/README.md b/entries/ghatem-fpc/README.md
@@ -247,3 +247,31 @@ Another trial with various hash functions, a simple modulus vs. a slightly more
 Can be tested with the HASHMULT build option
 
 Finally, it seems choosing a dictionary size that is a prime number is also recommended: shaves 1 second out of 20 on my PC.
+
+## v.6 (2024-05-04)
+
+As of the latest results executed by Paweld, there are two main bottlenecks throttling the entire implementation, according to CallGrind and KCacheGrind:
+ - function ExtractLineData, 23% of total cost, of which 9% is due to `fpc_stackcheck`
+ - the hash lookup function, at 40% of total cost
+
+Currently, the hash lookup is done on an array of records. Increasing the array size causes slowness, and reducing it causes further collisions.
+Will try to see how to reduce collisions (increase array size), all while minimizing the cost of cache misses.
+
+Edit:
+The goal is to both:
+ - minimize collisions on the hashes (keys) by having a good hash function, but also increase the size of the keys storage
+ - minimize the size of the array of packed records
+
+The idea:
+ - the dictionary will no longer point to a PStationData pointer, but rather to an index between 0 and StationCount, where the record is stored in the array.
+ - -> data about the same station will be stored at the same index for all threads' data-arrays
+ - -> names will also be stored at that same index upon first encounter, and is common to all threads
+ - no locking needs to occur when the key is already found, since there is no multiple-write occurring
+ - the data-arrays are pre-allocated, and a atomic-counter will be incremented to know where the next element will be stored.
+
+Thinking again, this is likely similar to the approach mentioned by @synopse in one of his comments.
+
+For the ExtractLineData, three ideas to try implementing:
+ - avoid using a function, to get rid of the cost of stack checking
+ - reduce branching, I think it should be possible to go from 3 if-statements, to only 1
+ - unroll the loop (although I had tried this in the past, did not show any improvements)
diff --git a/entries/ghatem-fpc/src/OneBRC-nosharedname.lpr b/entries/ghatem-fpc/src/OneBRC-nosharedname.lpr
@@ -151,12 +151,12 @@ function Ceiling (const ANumber: Double): Integer; inline;
   Result := Trunc(ANumber) + Ord(Frac(ANumber) > 0);
 end;
 
-function RoundExDouble (const aTemp: Double): Double; inline;
+function RoundExInteger (const aTemp: Double): Integer; inline;
 var
   vTmp: Double;
 begin
   vTmp := aTemp * 10;
-  Result := Ceiling (vTmp) / 10;
+  Result := Ceiling (vTmp);
 end;
 
 //---------------------------------------------------
@@ -473,8 +473,24 @@ procedure TOneBRC.MergeAll;
 
 //---------------------------------------------------
 
+function MyFormatInt (const aIn: SmallInt): AnsiString; inline;
+begin
+  Result := IntToStr(aIn);
+  Insert ('.', Result, Length(Result));
+
+  if Result[1] = '.' then begin
+    Insert ('0', Result, 1);
+    exit;
+  end;
+
+  if (Result[1] = '-') and (Result[2] = '.') then
+    Insert('0', Result, 2);
+end;
+
+//---------------------------------------------------
+
 procedure TOneBRC.GenerateOutput;
-var vMin, vMean, vMax: Double;
+var vMean: Integer;
     vStream: TStringStream;
     I, N: Int64;
     vData: PStationData;
@@ -507,14 +523,12 @@ procedure TOneBRC.GenerateOutput;
       // debatable, and the whole output generation is < 0.3 seconds, so not exactly worth it
       vHash := crc32c(0, @vStations[i][1], Length (vStations[i]));
       FStationsDicts[0].TryGetValue(vHash, vData);
-      vMin := vData^.Min/10;
-      vMax := vData^.Max/10;
-      vMean := RoundExDouble(vData^.Sum/vData^.Count/10);
+      vMean := RoundExInteger(vData^.Sum/vData^.Count/10);
 
       vStream.WriteString(
-        vStations[i] + '=' + FormatFloat('0.0', vMin)
-                     + '/' + FormatFloat('0.0', vMean)
-                     + '/' + FormatFloat('0.0', vMax) + ', '
+        vStations[i] + '=' + MyFormatInt(vData^.Min)
+                     + '/' + MyFormatInt(vMean)
+                     + '/' + MyFormatInt(vData^.Max) + ', '
       );
       Inc(I);
     end;
@@ -645,4 +659,3 @@ procedure TOneBRCApp.WriteHelp;
 end.
 
 {$ENDREGION}
-
diff --git a/entries/ghatem-fpc/src/OneBRC-smallrec.lpr b/entries/ghatem-fpc/src/OneBRC-smallrec.lpr
@@ -154,12 +154,12 @@ function Ceiling (const ANumber: Double): Integer; inline;
   Result := Trunc(ANumber) + Ord(Frac(ANumber) > 0);
 end;
 
-function RoundExDouble (const aTemp: Double): Double; inline;
+function RoundExInteger (const aTemp: Double): Integer; inline;
 var
   vTmp: Double;
 begin
   vTmp := aTemp * 10;
-  Result := Ceiling (vTmp) / 10;
+  Result := Ceiling (vTmp);
 end;
 
 //---------------------------------------------------
@@ -487,8 +487,24 @@ procedure TOneBRC.MergeAll;
 
 //---------------------------------------------------
 
+function MyFormatInt (const aIn: SmallInt): AnsiString; inline;
+begin
+  Result := IntToStr(aIn);
+  Insert ('.', Result, Length(Result));
+
+  if Result[1] = '.' then begin
+    Insert ('0', Result, 1);
+    exit;
+  end;
+
+  if (Result[1] = '-') and (Result[2] = '.') then
+    Insert('0', Result, 2);
+end;
+
+//---------------------------------------------------
+
 procedure TOneBRC.GenerateOutput;
-var vMin, vMean, vMax: Double;
+var vMean: Integer;
     vStream: TStringStream;
     I, N: Int64;
     vData: PStationData;
@@ -521,14 +537,12 @@ procedure TOneBRC.GenerateOutput;
       // debatable, and the whole output generation is < 0.3 seconds, so not exactly worth it
       vHash := crc32c(0, @vStations[i][1], Length (vStations[i]));
       FStationsDicts[0].TryGetValue(vHash, vData);
-      vMin := vData^.Min/10;
-      vMax := vData^.Max/10;
-      vMean := RoundExDouble(vData^.Sum/vData^.Count/10);
+      vMean := RoundExInteger(vData^.Sum/vData^.Count/10);
 
       vStream.WriteString(
-        vStations[i] + '=' + FormatFloat('0.0', vMin)
-                     + '/' + FormatFloat('0.0', vMean)
-                     + '/' + FormatFloat('0.0', vMax) + ', '
+        vStations[i] + '=' + MyFormatInt(vData^.Min)
+                     + '/' + MyFormatInt(vMean)
+                     + '/' + MyFormatInt(vData^.Max) + ', '
       );
       Inc(I);
     end;
@@ -659,4 +673,3 @@ procedure TOneBRCApp.WriteHelp;
 end.
 
 {$ENDREGION}
-
diff --git a/entries/ghatem-fpc/src/OneBRC.lpr b/entries/ghatem-fpc/src/OneBRC.lpr
@@ -154,12 +154,12 @@ function Ceiling (const ANumber: Double): Integer; inline;
   Result := Trunc(ANumber) + Ord(Frac(ANumber) > 0);
 end;
 
-function RoundExDouble (const aTemp: Double): Double; inline;
+function RoundExInteger (const aTemp: Double): Integer; inline;
 var
   vTmp: Double;
 begin
   vTmp := aTemp * 10;
-  Result := Ceiling (vTmp) / 10;
+  Result := Ceiling (vTmp);
 end;
 
 //---------------------------------------------------
@@ -487,8 +487,24 @@ procedure TOneBRC.MergeAll;
 
 //---------------------------------------------------
 
+function MyFormatInt (const aIn: SmallInt): AnsiString; inline;
+begin
+  Result := IntToStr(aIn);
+  Insert ('.', Result, Length(Result));
+
+  if Result[1] = '.' then begin
+    Insert ('0', Result, 1);
+    exit;
+  end;
+
+  if (Result[1] = '-') and (Result[2] = '.') then
+    Insert('0', Result, 2);
+end;
+
+//---------------------------------------------------
+
 procedure TOneBRC.GenerateOutput;
-var vMin, vMean, vMax: Double;
+var vMean: Integer;
     vStream: TStringStream;
     I, N: Int64;
     vData: PStationData;
@@ -521,14 +537,12 @@ procedure TOneBRC.GenerateOutput;
       // debatable, and the whole output generation is < 0.3 seconds, so not exactly worth it
       vHash := crc32c(0, @vStations[i][1], Length (vStations[i]));
       FStationsDicts[0].TryGetValue(vHash, vData);
-      vMin := vData^.Min/10;
-      vMax := vData^.Max/10;
-      vMean := RoundExDouble(vData^.Sum/vData^.Count/10);
+      vMean := RoundExInteger(vData^.Sum/vData^.Count/10);
 
       vStream.WriteString(
-        vStations[i] + '=' + FormatFloat('0.0', vMin)
-                     + '/' + FormatFloat('0.0', vMean)
-                     + '/' + FormatFloat('0.0', vMax) + ', '
+        vStations[i] + '=' + MyFormatInt(vData^.Min)
+                     + '/' + MyFormatInt(vMean)
+                     + '/' + MyFormatInt(vData^.Max) + ', '
       );
       Inc(I);
     end;
@@ -659,4 +673,3 @@ procedure TOneBRCApp.WriteHelp;
 end.
 
 {$ENDREGION}
-