@@ -19,7 +19,7 @@ interface
1919 { Create a record of temperature stats.
2020 Borrowed the concept from go's approach, save floats as int64.
2121 This saved ~2 mins processing time for processing 1 billion rows.
22- 2024-04-05. Using pointers saves ~ 30 seconds.}
22+ 2024-04-05. Using pointers saves approx. 30 seconds for processing 1BRC .}
2323 PStat = ^TStat;
2424 TStat = record
2525 var
@@ -31,9 +31,8 @@ TStat = record
3131 function ToString : string;
3232 end ;
3333
34-
3534type
36- // Create a dictionary, now approx 4 mins faster than Generics.Collections.TDictionary
35+ // Create a dictionary, now approx. 4 mins faster than Generics.Collections.TDictionary
3736 TWeatherDictionaryLG = specialize TGHashMapQP<string, PStat>;
3837
3938type
@@ -230,7 +229,7 @@ procedure TWeatherStation.AddCityTemperatureLG(const cityName: string; const new
230229 // If city name doesn't exist add a new entry
231230 if not self.weatherDictionary.Contains(cityName) then
232231 begin
233- // self.weatherDictionary.Add(cityName, TStat. Create(newTemp, newTemp, newTemp, 1));
232+ // Create a new Stat record
234233 New(stat);
235234 stat^.min := newTemp;
236235 stat^.max := newTemp;
@@ -334,35 +333,7 @@ procedure TWeatherStation.ReadMeasurementsClassic;
334333 end ;
335334end ;
336335
337- { procedure TWeatherStation.ParseStationAndTempFromChunk(const chunkData: pansichar;
338- const dataSize: int64; const chunkIndex: int64);
339- var
340- mStream: TMemoryStream;
341- streamReader: TStreamReader;
342- currentString: string;
343- begin
344- mStream:=TMemoryStream.Create;
345- try
346- mStream.WriteBuffer(chunkData^, dataSize);
347- mStream.Position:=0;
348-
349- streamReader:=TStreamReader.Create(mStream, 1048576, False);
350- try
351- while not streamReader.Eof do
352- begin
353- currentString:=streamReader.ReadLine;
354- self.ParseStationAndTemp(currentString);
355- end;
356- finally
357- streamReader.Free;
358- end;
359- finally
360- mStream.Free;
361- end;
362- end;}
363-
364- procedure TWeatherStation.ParseStationAndTempFromChunk (const chunkData: pansichar;
365- const dataSize: int64; const chunkIndex: int64);
336+ procedure TWeatherStation.ParseStationAndTempFromChunk (const chunkData: pansichar; const dataSize: int64; const chunkIndex: int64);
366337var
367338 index, lineStart, lineLength: int64;
368339 line:string;
@@ -398,9 +369,8 @@ procedure TWeatherStation.ReadMeasurementsInChunks(const filename: string);
398369 bytesRead, totalBytesRead, chunkSize, lineBreakPos, chunkIndex: int64;
399370begin
400371
401- // chunkSize := defaultChunkSize * 2; // Now 128MB in bytes ~ 5.52 :D
402- chunkSize := defaultChunkSize * 4 ; // Now 256MB in bytes ~ 5.50 :D
403- // chunkSize := defaultChunkSize * 4 * 4; // Now 1GB in bytes ~ 5:53 :D
372+ // Set chunk size
373+ chunkSize := defaultChunkSize * 4 ; // Now 256MB in bytes
404374
405375 // Open the file for reading
406376 fileStream := TFileStream.Create(filename, fmOpenRead or fmShareDenyWrite);
@@ -464,7 +434,7 @@ procedure TWeatherStation.ProcessMeasurements;
464434begin
465435 // self.ReadMeasurements;
466436 // self.ReadMeasurementsClassic;
467- { chunking cuts ~ 30 - 40 seconds of processing time from ~6.45 to 6.00 }
437+ { Read in chunks cuts ~ 30 - 40 seconds of processing time. }
468438 self.ReadMeasurementsInChunks(self.fname);
469439 self.SortWeatherStationAndStats;
470440 self.PrintSortedWeatherStationAndStats;
0 commit comments