@@ -11,7 +11,6 @@ interface
1111 , streamex
1212 , bufstream
1313 , lgHashMap
14- , StrUtils
1514 { $IFDEF DEBUG}
1615 , Stopwatch
1716 { $ENDIF}
@@ -52,8 +51,6 @@ TWeatherStation = class
5251 lookupStrFloatToIntList: TValidTemperatureDictionary;
5352 procedure CreateLookupTemp ;
5453 procedure ReadMeasurements ;
55- procedure ReadMeasurementsBuf ;
56- procedure ReadMeasurementsBufSL ;
5754 procedure ParseStationAndTemp (const line: string);
5855 procedure AddCityTemperatureLG (const cityName: string; const newTemp: int64);
5956 procedure SortWeatherStationAndStats ;
@@ -243,9 +240,6 @@ procedure TWeatherStation.AddCityTemperatureLG(const cityName: string;
243240 // If city name esxists, modify temp as needed
244241 if self.weatherDictionary.TryGetValue(cityName, stat) then
245242 begin
246- // Get the temp record
247- // stat := self.weatherDictionary[cityName];
248-
249243 // Update min and max temps if needed
250244 // Re-arranged the if statement, to achieve minimal if checks.
251245 // This saves approx 15 seconds when processing 1 billion row.
@@ -303,7 +297,7 @@ procedure TWeatherStation.ParseStationAndTemp(const line: string);
303297 if delimiterPos > 0 then
304298 begin
305299 // Get the weather station name
306- // Using Copy and POS - as suggested by Gemini AI.
300+ // Using Copy and POS instead of SplitString - as suggested by Gemini AI.
307301 // This part saves 3 mins faster when processing 1 billion rows.
308302
309303 // No need to create a string
@@ -321,15 +315,13 @@ procedure TWeatherStation.ParseStationAndTemp(const line: string);
321315procedure TWeatherStation.ReadMeasurements ;
322316var
323317 fileStream: TFileStream;
324- bufStream: TReadBufStream;
325318 streamReader: TStreamReader;
326- line: string;
327319begin
328320
329321 // Open the file for reading
330322 fileStream := TFileStream.Create(self.fname, fmOpenRead);
331323 try
332- streamReader := TStreamReader.Create(fileStream, 65536 * 16 , False);
324+ streamReader := TStreamReader.Create(fileStream, 65536 * 32 , False);
333325 try
334326 // Read and parse chunks of data until EOF -------------------------------
335327 while not streamReader.EOF do
@@ -346,183 +338,11 @@ procedure TWeatherStation.ReadMeasurements;
346338 end ;
347339end ;
348340
349- procedure TWeatherStation.ReadMeasurementsBuf ;
350- var
351- fileStream: TFileStream;
352- memStream: TMemoryStream;
353- streamReader: TStreamReader;
354- buffer: TBytes;
355- bytesRead, totalBytesRead, chunkSize, lineBreakPos, chunkIndex,
356- index, lineCount: int64;
357- begin
358-
359- chunksize := 4194304 * 1 ;
360-
361- // Open the file for reading
362- fileStream := TFileStream.Create(self.fname, fmOpenRead);
363- SetLength(buffer, chunkSize);
364- try
365- memStream := TMemoryStream.Create;
366- try
367- totalBytesRead := 0 ;
368- chunkIndex := 0 ;
369- lineCount := 0 ;
370-
371- // Read and parse chunks of data until EOF
372- while totalBytesRead < fileStream.Size do
373- begin
374- // Read more bytes and keep track on bytes read
375- bytesRead := fileStream.Read(buffer[0 ], chunkSize);
376- Inc(totalBytesRead, bytesRead);
377-
378- // Find the position of the last newline character in the chunk
379- lineBreakPos := BytesRead;
380- while (lineBreakPos > 0 ) and (Buffer[lineBreakPos - 1 ] <> Ord(#10 )) do
381- Dec(lineBreakPos);
382-
383- { Now, must ensure that if the last byte read in the current chunk
384- is not a newline character, the file pointer is moved back to include
385- that byte and any preceding bytes of the partial line in the next
386- chunk's read operation.
387-
388- Also, no need to update the BytesRead variable in this context because
389- it represents the actual number of bytes read from the file, including
390- any partial line that may have been included due to moving the file
391- pointer back.
392- Ref: https://www.freepascal.org/docs-html/rtl/classes/tstream.seek.html}
393- if lineBreakPos < bytesRead then
394- fileStream.Seek(-(bytesRead - lineBreakPos), soCurrent);
395- { $IFDEF DEBUG}
396- // Do something with the chunk here
397- // Like counting line
398- for index := 0 to lineBreakPos - 1 do
399- if buffer[index] = Ord(#10 ) then
400- lineCount := lineCount + 1 ;
401- { $ENDIF DEBUG}
402-
403- // Use memory stream & stream reader
404- memStream.Write(buffer[0 ], lineBreakPos - 1 );
405- memStream.Position := 0 ;
406- streamReader := TStreamReader.Create(memStream);
407- try
408- while not streamReader.EOF do
409- begin
410- // WriteLn(streamReader.ReadLine);
411- self.ParseStationAndTemp(streamReader.ReadLine);
412- end ;
413- finally
414- streamReader.Free;
415- end ;
416- { $IFDEF DEBUG}
417- // Display user feedback
418- WriteLn(' Line count: ' , IntToStr(lineCount));
419- WriteLn(' Chunk ' , chunkIndex, ' , Total bytes read:' , IntToStr(totalBytesRead));
420- { $ENDIF DEBUG}
421-
422- { $IFDEF DEBUG}
423- // Increase chunk index - a counter
424- Inc(chunkIndex);
425- { $ENDIF DEBUG}
426- end ;
427- finally
428- memStream.Free;
429- end ;
430- finally
431- // Close the file
432- fileStream.Free;
433- end ;
434- end ;
435-
436- procedure TWeatherStation.ReadMeasurementsBufSL ;
437- var
438- fileStream: TFileStream;
439- strList: TStringList;
440- streamReader: TStreamReader;
441- buffer, trimmedBuffer: TBytes;
442- bytesRead, totalBytesRead, chunkSize, lineBreakPos, chunkIndex,
443- slIndex, lineCount: int64;
444- begin
445-
446- chunksize := 8192 * 1 ;
447-
448- // Open the file for reading
449- fileStream := TFileStream.Create(self.fname, fmOpenRead);
450- SetLength(buffer, chunkSize);
451- try
452- strList := TStringList.Create;
453- try
454- totalBytesRead := 0 ;
455- chunkIndex := 0 ;
456- lineCount := 0 ;
457-
458- // Read and parse chunks of data until EOF
459- while totalBytesRead < fileStream.Size do
460- begin
461- // Read more bytes and keep track on bytes read
462- bytesRead := fileStream.Read(buffer[0 ], chunkSize);
463- Inc(totalBytesRead, bytesRead);
464-
465- // Find the position of the last newline character in the chunk
466- lineBreakPos := BytesRead;
467- while (lineBreakPos > 0 ) and (Buffer[lineBreakPos - 1 ] <> Ord(#10 )) do
468- Dec(lineBreakPos);
469-
470- { Now, must ensure that if the last byte read in the current chunk
471- is not a newline character, the file pointer is moved back to include
472- that byte and any preceding bytes of the partial line in the next
473- chunk's read operation.
474-
475- Also, no need to update the BytesRead variable in this context because
476- it represents the actual number of bytes read from the file, including
477- any partial line that may have been included due to moving the file
478- pointer back.
479- Ref: https://www.freepascal.org/docs-html/rtl/classes/tstream.seek.html}
480- if lineBreakPos < bytesRead then
481- fileStream.Seek(-(bytesRead - lineBreakPos), soCurrent);
482- { $IFDEF DEBUG}
483- // Do something with the chunk here
484- // Like counting line
485- for index := 0 to lineBreakPos - 1 do
486- if buffer[index] = Ord(#10 ) then
487- lineCount := lineCount + 1 ;
488- { $ENDIF DEBUG}
489-
490- // Use TStringList and a sub-TBytes array up to lineBreakPos
491- SetLength(trimmedBuffer, lineBreakPos);
492- // Index 'n' is inclusive, so add 1 to the length
493- Move(buffer[0 ], trimmedBuffer[0 ], Length(trimmedBuffer)); // Copy the bytes
494- strList.Clear;
495- strList.Text := ansistring(trimmedBuffer);
496- for slIndex := 0 to strList.Count - 1 do
497- self.ParseStationAndTemp(strList[slIndex]);
498-
499- { $IFDEF DEBUG}
500- // Display user feedback
501- WriteLn(' Line count: ' , IntToStr(lineCount));
502- WriteLn(' Chunk ' , chunkIndex, ' , Total bytes read:' , IntToStr(totalBytesRead));
503- { $ENDIF DEBUG}
504-
505- { $IFDEF DEBUG}
506- // Increase chunk index - a counter
507- Inc(chunkIndex);
508- { $ENDIF DEBUG}
509- end ;
510- finally
511- strList.Free;
512- end ;
513- finally
514- // Close the file
515- fileStream.Free;
516- end ;
517- end ;
518-
519341// The main algorithm
520342procedure TWeatherStation.ProcessMeasurements ;
521343begin
522344 self.CreateLookupTemp;
523345 self.ReadMeasurements;
524- // self.ReadMeasurementsBuf;
525- // self.ReadMeasurementsBufSL;
526346 self.SortWeatherStationAndStats;
527347 self.PrintSortedWeatherStationAndStats;
528348end ;
0 commit comments