@@ -17,25 +17,24 @@ interface
1717
1818type
1919 { Create a record of temperature stats.
20-
21- Borrowed the concept from go's approach to improve performance, save floats as int64.
22- This saved ~2 mins processing time for processing 1 billion rows.}
20+ Borrowed the concept from go's approach, save floats as int64.
21+ This saved ~2 mins processing time for processing 1 billion rows.
22+ 2024-04-05. Using pointers saves ~ 30 seconds.}
23+ PStat = ^TStat;
2324 TStat = record
2425 var
2526 min: int64;
2627 max: int64;
2728 sum: int64;
2829 cnt: int64;
2930 public
30- constructor Create(const newMin: int64; const newMax: int64;
31- const newSum: int64; const newCount: int64);
3231 function ToString : string;
3332 end ;
34- PStat = ^TStat;
33+
3534
3635type
3736 // Create a dictionary, now approx 4 mins faster than Generics.Collections.TDictionary
38- TWeatherDictionaryLG = specialize TGHashMapQP<string, TStat >;
37+ TWeatherDictionaryLG = specialize TGHashMapQP<string, PStat >;
3938
4039type
4140 // Create a class to encapsulate the temperature observations of each weather station.
@@ -101,15 +100,6 @@ function RemoveDots(const line: string): string;
101100 end ;
102101end ;
103102
104- constructor TStat.Create(const newMin: int64; const newMax: int64;
105- const newSum: int64; const newCount: int64);
106- begin
107- self.min := newMin;
108- self.max := newMax;
109- self.sum := newSum;
110- self.cnt := newCount;
111- end ;
112-
113103function TStat.ToString : string;
114104var
115105 minR, meanR, maxR: double; // Store the rounded values prior saving to TStringList.
@@ -132,13 +122,19 @@ constructor TWeatherStation.Create(const filename: string);
132122end ;
133123
134124destructor TWeatherStation.Destroy;
125+ var
126+ stationName: string;
135127begin
136128 // Free TStringLIst dictionary
137129 weatherStationList.Free;
138- // Free the dictionary
130+ // Free the dictionary - 1. Free the PStat first before the container!
131+ for stationName in self.weatherDictionary.Keys do
132+ Dispose(PStat(self.weatherDictionary.Items[stationName]));
133+ // Free the dictionary - 2. Free the container last.
139134 weatherDictionary.Free;
140135end ;
141136
137+
142138procedure TWeatherStation.PrintSortedWeatherStationAndStats ;
143139var
144140 outputList: string;
@@ -191,7 +187,7 @@ procedure TWeatherStation.SortWeatherStationAndStats;
191187
192188 for wsKey in weatherDictionary.Keys do
193189 begin
194- self.weatherStationList.Add(wsKey + ' =' + weatherDictionary[wsKey].ToString + ' , ' );
190+ self.weatherStationList.Add(wsKey + ' =' + weatherDictionary[wsKey]^ .ToString + ' , ' );
195191 end ;
196192
197193 self.weatherStationList.CustomSort(@CustomTStringListComparer);
@@ -206,7 +202,7 @@ procedure TWeatherStation.SortWeatherStationAndStats;
206202procedure TWeatherStation.AddCityTemperatureLG (const cityName: string;
207203 const newTemp: int64);
208204var
209- stat: TStat ;
205+ stat: PStat ;
210206begin
211207 // If city name esxists, modify temp as needed
212208 if self.weatherDictionary.Contains(cityName) then
@@ -215,21 +211,21 @@ procedure TWeatherStation.AddCityTemperatureLG(const cityName: string;
215211 stat := self.weatherDictionary[cityName];
216212
217213 // If the temp lower then min, set the new min.
218- if newTemp < stat.min then
219- stat.min := newTemp;
214+ if newTemp < stat^ .min then
215+ stat^ .min := newTemp;
220216
221217 // If the temp higher than max, set the new max.
222- if newTemp > stat.max then
223- stat.max := newTemp;
218+ if newTemp > stat^ .max then
219+ stat^ .max := newTemp;
224220
225221 // Add count for this city.
226- stat.sum := stat.sum + newTemp;
222+ stat^ .sum := stat^ .sum + newTemp;
227223
228224 // Increase the counter
229- stat.cnt := stat.cnt + 1 ;
225+ stat^ .cnt := stat^ .cnt + 1 ;
230226
231227 // Update the stat of this city
232- self.weatherDictionary.AddOrSetValue(cityName, stat);
228+ // self.weatherDictionary.AddOrSetValue(cityName, stat);
233229 { $IFDEF DEBUG}
234230 // Display the line.
235231 WriteLn(' Updated: ' , cityName);
@@ -239,7 +235,13 @@ procedure TWeatherStation.AddCityTemperatureLG(const cityName: string;
239235 // If city name doesn't exist add a new entry
240236 if not self.weatherDictionary.Contains(cityName) then
241237 begin
242- self.weatherDictionary.Add(cityName, TStat.Create(newTemp, newTemp, newTemp, 1 ));
238+ // self.weatherDictionary.Add(cityName, TStat.Create(newTemp, newTemp, newTemp, 1));
239+ New(stat);
240+ stat^.min := newTemp;
241+ stat^.max := newTemp;
242+ stat^.sum := newTemp;
243+ stat^.cnt := 1 ;
244+ self.weatherDictionary.Add(cityName, stat);
243245
244246 { $IFDEF DEBUG}
245247 // Display the line.
@@ -368,28 +370,35 @@ procedure TWeatherStation.ParseStationAndTempFromChunk(const chunkData: pansicha
368370 const dataSize: int64; const chunkIndex: int64);
369371var
370372 index, lineStart, lineLength: int64;
373+ batch: TStringList;
371374begin
372375 lineStart := 0 ;
373-
374- // Check for Line Feed (LF)
375- for index := 0 to dataSize - 1 do
376- begin
377- if chunkData[index] = #10 then
376+ batch := TStringList.Create;
377+ try
378+ // Check for Line Feed (LF)
379+ for index := 0 to dataSize - 1 do
378380 begin
381+ if chunkData[index] = #10 then
382+ begin
379383
380- lineLength := index - lineStart;
384+ lineLength := index - lineStart;
381385
382- // Remove potential CR before LF (for Windows)
383- if (chunkData[index - 1 ] = #13 ) and (index < dataSize - 1 ) then
384- Dec(LineLength);
386+ // Remove potential CR before LF (for Windows)
387+ if (chunkData[index - 1 ] = #13 ) and (index < dataSize - 1 ) then
388+ Dec(LineLength);
385389
386- // The current line is now: Buffer[LineStart..LineStart+LineLength-1]
387- // WriteLn(chunkData[lineStart..lineStart + lineLength - 1], '.');
388- self.ParseStationAndTemp(chunkData[lineStart..lineStart + lineLength - 1 ]);
389- // Skip to the next 'line' in the buffer
390- lineStart := index + 1 ;
390+ // The current line is now: Buffer[LineStart..LineStart+LineLength-1]
391+ // WriteLn(chunkData[lineStart..lineStart + lineLength - 1], '.');
392+ self.ParseStationAndTemp(chunkData[lineStart..lineStart + lineLength - 1 ]);
393+ // Skip to the next 'line' in the buffer
394+ lineStart := index + 1 ;
395+ end ;
391396 end ;
397+
398+ finally
399+ batch.Free;
392400 end ;
401+
393402end ;
394403
395404procedure TWeatherStation.ReadMeasurementsInChunks (const filename: string);
@@ -402,8 +411,8 @@ procedure TWeatherStation.ReadMeasurementsInChunks(const filename: string);
402411begin
403412
404413 // chunkSize := defaultChunkSize * 2; // Now 128MB in bytes ~ 5.52 :D
405- // chunkSize := defaultChunkSize * 4; // Now 256MB in bytes ~ 5.50 :D
406- chunkSize := defaultChunkSize * 4 * 4 ; // Now 1GB in bytes ~ 5:53 :D
414+ chunkSize := defaultChunkSize * 4 ; // Now 256MB in bytes ~ 5.50 :D
415+ // chunkSize := defaultChunkSize * 4 * 4; // Now 1GB in bytes ~ 5:53 :D
407416
408417 // Open the file for reading
409418 fileStream := TFileStream.Create(filename, fmOpenRead or fmShareDenyWrite);
@@ -467,8 +476,9 @@ procedure TWeatherStation.ProcessMeasurements;
467476begin
468477 // self.ReadMeasurements;
469478 // self.ReadMeasurementsClassic;
479+ { chunking cuts ~ 30 - 40 seconds of processing time from ~6.45 to 6.00}
470480 self.ReadMeasurementsInChunks(self.fname);
471- { This method cuts ~ 30 - 40 seconds of processing time from ~6.45 to 6.00 }
481+
472482 self.SortWeatherStationAndStats;
473483 self.PrintSortedWeatherStationAndStats;
474484end ;
0 commit comments