Skip to content

Commit 9465399

Browse files
committed
Update - Increased chunk size prior parsing.
1 parent ccf42f2 commit 9465399

2 files changed

Lines changed: 36 additions & 9 deletions

File tree

entries/ikelaiah/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,11 @@ Iwan Kelaiah
117117
* Encapsulate process in a class.
118118
* Updated the rounding method as per the latest `README.md` in the 1BRC GitHub page.
119119

120+
121+
* 1.6
122+
* Revision release - Sequential approach. 6-8 mins on my Inspiron 15 7510 laptop (a little improvements on speed).
123+
* Read input file in chunks and process each chunk line by line. This saves approx 30 - 40 seconds.
124+
120125
## License
121126

122127
This project is licensed under the MIT License - see the LICENSE.md file for details

entries/ikelaiah/src/weatherstation.pas

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ interface
99
, SysUtils
1010
, Math
1111
, streamex
12-
, bufstream
1312
, lgHashMap
1413
{$IFDEF DEBUG}
1514
, Stopwatch
@@ -35,7 +34,7 @@ TStat = record
3534
PStat = ^TStat;
3635

3736
type
38-
// Create a dictionary
37+
// Create a dictionary, now approx 4 mins faster than Generics.Collections.TDictionary
3938
TWeatherDictionaryLG = specialize TGHashMapQP<string, TStat>;
4039

4140
type
@@ -146,6 +145,11 @@ procedure TWeatherStation.PrintSortedWeatherStationAndStats;
146145
index: int64;
147146
begin
148147

148+
{$IFDEF DEBUG}
149+
// Display the line.
150+
WriteLn('Printing now: ', DateTimeToStr(Now));
151+
{$ENDIF DEBUG}
152+
149153
if self.weatherStationList.Count = 0 then
150154
begin
151155
WriteLn('Nothing to print. The list is empty.');
@@ -160,12 +164,23 @@ procedure TWeatherStation.PrintSortedWeatherStationAndStats;
160164
// Remove last comma and space; ', ', a neat trick from Gus.
161165
SetLength(outputList, Length(outputList) - 2);
162166
WriteLn('{', outputList, '}');
167+
168+
{$IFDEF DEBUG}
169+
// Display the line.
170+
WriteLn('Printing done: ', DateTimeToStr(Now));
171+
{$ENDIF DEBUG}
163172
end;
164173

165174
procedure TWeatherStation.SortWeatherStationAndStats;
166175
var
167176
wsKey: string;
168177
begin
178+
179+
{$IFDEF DEBUG}
180+
// Display the line.
181+
WriteLn('Sorting now: ', DateTimeToStr(Now));
182+
{$ENDIF DEBUG}
183+
169184
wsKey := '';
170185

171186
if self.weatherDictionary.GetCapacity = 0 then
@@ -180,6 +195,12 @@ procedure TWeatherStation.SortWeatherStationAndStats;
180195
end;
181196

182197
self.weatherStationList.CustomSort(@CustomTStringListComparer);
198+
199+
200+
{$IFDEF DEBUG}
201+
// Display the line.
202+
WriteLn('Sorting done: ', DateTimeToStr(Now));
203+
{$ENDIF DEBUG}
183204
end;
184205

185206
procedure TWeatherStation.AddCityTemperatureLG(const cityName: string;
@@ -359,12 +380,12 @@ procedure TWeatherStation.ParseStationAndTempFromChunk(const chunkData: pansicha
359380
lineLength := index - lineStart;
360381

361382
// Remove potential CR before LF (for Windows)
362-
if (chunkData[index-1] = #13) and (index < dataSize - 1) then
383+
if (chunkData[index - 1] = #13) and (index < dataSize - 1) then
363384
Dec(LineLength);
364385

365386
// The current line is now: Buffer[LineStart..LineStart+LineLength-1]
366387
// WriteLn(chunkData[lineStart..lineStart + lineLength - 1], '.');
367-
self.ParseStationAndTemp(chunkData[lineStart..lineStart+lineLength - 1]);
388+
self.ParseStationAndTemp(chunkData[lineStart..lineStart + lineLength - 1]);
368389
// Skip to the next 'line' in the buffer
369390
lineStart := index + 1;
370391
end;
@@ -373,16 +394,17 @@ procedure TWeatherStation.ParseStationAndTempFromChunk(const chunkData: pansicha
373394

374395
procedure TWeatherStation.ReadMeasurementsInChunks(const filename: string);
375396
const
376-
defaultChunkSize: integer = 536870912; // 512MB in bytes
397+
defaultChunkSize: int64 = 67108864; // 64MB in bytes
377398
var
378399
fileStream: TFileStream;
379400
buffer: pansichar;
380401
bytesRead, totalBytesRead, chunkSize, lineBreakPos, chunkIndex: int64;
381402
begin
382-
chunkSize := defaultChunkSize * 1;
403+
chunkSize := defaultChunkSize * 4 * 4; // Now 1GB in bytes ~ 5:53 :D
404+
// chunkSize := defaultChunkSize * 4; // Now 512GB in bytes ~ 5.50 :D
383405

384406
// Open the file for reading
385-
fileStream := TFileStream.Create(filename, fmOpenRead);
407+
fileStream := TFileStream.Create(filename, fmOpenRead or fmShareDenyWrite);
386408
try
387409
// Allocate memory buffer for reading chunks
388410
// Ref: https://www.freepascal.org/docs-html/rtl/system/getmem.html
@@ -395,7 +417,7 @@ procedure TWeatherStation.ReadMeasurementsInChunks(const filename: string);
395417
while totalBytesRead < fileStream.Size do
396418
begin
397419
{$IFDEF DEBUG}
398-
WriteLn('Processing chunk index: ', IntToStr(chunkIndex));
420+
WriteLn('Processing chunk index: ', IntToStr(chunkIndex));
399421
{$ENDIF DEBUG}
400422

401423
bytesRead := fileStream.Read(buffer^, chunkSize);
@@ -443,7 +465,7 @@ procedure TWeatherStation.ProcessMeasurements;
443465
begin
444466
// self.ReadMeasurements;
445467
// self.ReadMeasurementsClassic;
446-
self.ReadMeasurementsInChunks(self.fname); // This method cuts approx 30 seconds of processing time
468+
self.ReadMeasurementsInChunks(self.fname); {This method cuts ~ 30 - 40 seconds of processing time from ~6.45 to 6.00}
447469
self.SortWeatherStationAndStats;
448470
self.PrintSortedWeatherStationAndStats;
449471
end;

0 commit comments

Comments
 (0)