@@ -17,23 +17,25 @@ interface
1717
1818type
1919 { Create a record of temperature stats.
20- Borrowed the concept from go's approach, save floats as int64.
21- This saved ~2 mins processing time for processing 1 billion rows.
22- 2024-04-05. Using pointers saves approx. 30 seconds for processing 1BRC.}
23- PStat = ^TStat;
20+
21+ Borrowed the concept from go's approach to improve performance, save floats as int64.
22+ This saved ~2 mins processing time for processing 1 billion rows.}
2423 TStat = record
2524 var
2625 min: int64;
2726 max: int64;
2827 sum: int64;
2928 cnt: int64;
3029 public
30+ constructor Create(const newMin: int64; const newMax: int64;
31+ const newSum: int64; const newCount: int64);
3132 function ToString : string;
3233 end ;
34+ PStat = ^TStat;
3335
3436type
35- // Create a dictionary, now approx. 4 mins faster than Generics.Collections.TDictionary
36- TWeatherDictionaryLG = specialize TGHashMapQP<string, PStat >;
37+ // Create a dictionary, now approx 4 mins faster than Generics.Collections.TDictionary
38+ TWeatherDictionaryLG = specialize TGHashMapQP<string, TStat >;
3739
3840type
3941 // Create a class to encapsulate the temperature observations of each weather station.
@@ -45,8 +47,7 @@ TWeatherStation = class
4547 procedure ReadMeasurements ;
4648 procedure ReadMeasurementsClassic ;
4749 procedure ReadMeasurementsInChunks (const filename: string);
48- procedure ParseStationAndTempFromChunk (const chunkData: pansichar;
49- const dataSize: int64; const chunkIndex: int64);
50+ procedure ParseStationAndTempFromChunk (const chunkData: pansichar; const dataSize: int64; const chunkIndex: int64);
5051 procedure ParseStationAndTemp (const line: string);
5152 procedure AddCityTemperatureLG (const cityName: string; const newTemp: int64);
5253 procedure SortWeatherStationAndStats ;
@@ -67,8 +68,7 @@ implementation
6768 The following procedure Written by Székely Balázs for the 1BRC for Object Pascal.
6869 URL: https://github.com/gcarreno/1brc-ObjectPascal/tree/main
6970}
70- function CustomTStringListComparer (AList: TStringList;
71- AIndex1, AIndex2: integer): integer;
71+ function CustomTStringListComparer (AList: TStringList; AIndex1, AIndex2: integer): integer;
7272var
7373 Pos1, Pos2: integer;
7474 Str1, Str2: string;
@@ -99,6 +99,15 @@ function RemoveDots(const line: string): string;
9999 end ;
100100end ;
101101
102+ constructor TStat.Create(const newMin: int64; const newMax: int64;
103+ const newSum: int64; const newCount: int64);
104+ begin
105+ self.min := newMin;
106+ self.max := newMax;
107+ self.sum := newSum;
108+ self.cnt := newCount;
109+ end ;
110+
102111function TStat.ToString : string;
103112var
104113 minR, meanR, maxR: double; // Store the rounded values prior saving to TStringList.
@@ -121,19 +130,13 @@ constructor TWeatherStation.Create(const filename: string);
121130end ;
122131
123132destructor TWeatherStation.Destroy;
124- var
125- stationName: string;
126133begin
127134 // Free TStringLIst dictionary
128135 weatherStationList.Free;
129- // Free the dictionary - 1. Free the PStat first before the container!
130- for stationName in self.weatherDictionary.Keys do
131- Dispose(PStat(self.weatherDictionary.Items[stationName]));
132- // Free the dictionary - 2. Free the container last.
136+ // Free the dictionary
133137 weatherDictionary.Free;
134138end ;
135139
136-
137140procedure TWeatherStation.PrintSortedWeatherStationAndStats ;
138141var
139142 outputList: string;
@@ -186,7 +189,7 @@ procedure TWeatherStation.SortWeatherStationAndStats;
186189
187190 for wsKey in weatherDictionary.Keys do
188191 begin
189- self.weatherStationList.Add(wsKey + ' =' + weatherDictionary[wsKey]^ .ToString + ' , ' );
192+ self.weatherStationList.Add(wsKey + ' =' + weatherDictionary[wsKey].ToString + ' , ' );
190193 end ;
191194
192195 self.weatherStationList.CustomSort(@CustomTStringListComparer);
@@ -198,9 +201,10 @@ procedure TWeatherStation.SortWeatherStationAndStats;
198201 { $ENDIF DEBUG}
199202end ;
200203
201- procedure TWeatherStation.AddCityTemperatureLG (const cityName: string; const newTemp: int64);
204+ procedure TWeatherStation.AddCityTemperatureLG (const cityName: string;
205+ const newTemp: int64);
202206var
203- stat: PStat ;
207+ stat: TStat ;
204208begin
205209 // If city name esxists, modify temp as needed
206210 if self.weatherDictionary.Contains(cityName) then
@@ -209,17 +213,21 @@ procedure TWeatherStation.AddCityTemperatureLG(const cityName: string; const new
209213 stat := self.weatherDictionary[cityName];
210214
211215 // If the temp lower then min, set the new min.
212- if newTemp < stat^.min then stat^.min := newTemp;
216+ if newTemp < stat.min then
217+ stat.min := newTemp;
213218
214219 // If the temp higher than max, set the new max.
215- if newTemp > stat^.max then stat^.max := newTemp;
220+ if newTemp > stat.max then
221+ stat.max := newTemp;
216222
217223 // Add count for this city.
218- stat^ .sum := stat^ .sum + newTemp;
224+ stat.sum := stat.sum + newTemp;
219225
220226 // Increase the counter
221- stat^ .cnt := stat^ .cnt + 1 ;
227+ stat.cnt := stat.cnt + 1 ;
222228
229+ // Update the stat of this city
230+ self.weatherDictionary.AddOrSetValue(cityName, stat);
223231 { $IFDEF DEBUG}
224232 // Display the line.
225233 WriteLn(' Updated: ' , cityName);
@@ -229,13 +237,7 @@ procedure TWeatherStation.AddCityTemperatureLG(const cityName: string; const new
229237 // If city name doesn't exist add a new entry
230238 if not self.weatherDictionary.Contains(cityName) then
231239 begin
232- // Create a new Stat record
233- New(stat);
234- stat^.min := newTemp;
235- stat^.max := newTemp;
236- stat^.sum := newTemp;
237- stat^.cnt := 1 ;
238- self.weatherDictionary.Add(cityName, stat);
240+ self.weatherDictionary.Add(cityName, TStat.Create(newTemp, newTemp, newTemp, 1 ));
239241
240242 { $IFDEF DEBUG}
241243 // Display the line.
@@ -336,28 +338,28 @@ procedure TWeatherStation.ReadMeasurementsClassic;
336338procedure TWeatherStation.ParseStationAndTempFromChunk (const chunkData: pansichar; const dataSize: int64; const chunkIndex: int64);
337339var
338340 index, lineStart, lineLength: int64;
339- line:string;
340-
341341begin
342342 lineStart := 0 ;
343343
344- // Check for Line Feed (LF)
345- for index := 0 to dataSize - 1 do
344+ // Check for Line Feed (LF)
345+ for index := 0 to dataSize - 1 do
346+ begin
347+ if chunkData[index] = #10 then
346348 begin
347- if chunkData[index] = #10 then
348- begin
349- lineLength := index - lineStart;
350- // Remove potential CR before LF (for Windows)
351- if (chunkData[index - 1 ] = #13 ) and (index < dataSize - 1 ) then
352- Dec(LineLength);
353-
354- // The current line is now: Buffer[LineStart..LineStart+LineLength-1]
355- // To print: WriteLn(chunkData[lineStart..lineStart + lineLength - 1], '.');
356- self.ParseStationAndTemp(chunkData[lineStart..lineStart + lineLength - 1 ]);
357- // Skip to the next 'line' in the buffer
358- lineStart := index + 1 ;
359- end ;
349+
350+ lineLength := index - lineStart;
351+
352+ // Remove potential CR before LF (for Windows)
353+ if (chunkData[index - 1 ] = #13 ) and (index < dataSize - 1 ) then
354+ Dec(LineLength);
355+
356+ // The current line is now: Buffer[LineStart..LineStart+LineLength-1]
357+ // WriteLn(chunkData[lineStart..lineStart + lineLength - 1], '.');
358+ self.ParseStationAndTemp(chunkData[lineStart..lineStart + lineLength - 1 ]);
359+ // Skip to the next 'line' in the buffer
360+ lineStart := index + 1 ;
360361 end ;
362+ end ;
361363end ;
362364
363365procedure TWeatherStation.ReadMeasurementsInChunks (const filename: string);
@@ -369,8 +371,7 @@ procedure TWeatherStation.ReadMeasurementsInChunks(const filename: string);
369371 bytesRead, totalBytesRead, chunkSize, lineBreakPos, chunkIndex: int64;
370372begin
371373
372- // Set chunk size
373- chunkSize := defaultChunkSize * 4 ; // Now 256MB in bytes
374+ chunkSize := defaultChunkSize * 4 ; // 256MB in bytes
374375
375376 // Open the file for reading
376377 fileStream := TFileStream.Create(filename, fmOpenRead or fmShareDenyWrite);
@@ -434,7 +435,7 @@ procedure TWeatherStation.ProcessMeasurements;
434435begin
435436 // self.ReadMeasurements;
436437 // self.ReadMeasurementsClassic;
437- { Read in chunks cuts ~ 30 - 40 seconds of processing time. }
438+ { This chunking method cuts ~ 30 - 40 seconds of processing time from ~6.45 to 6.00 }
438439 self.ReadMeasurementsInChunks(self.fname);
439440 self.SortWeatherStationAndStats;
440441 self.PrintSortedWeatherStationAndStats;
0 commit comments