Skip to content

Commit 001f712

Browse files
committed
Update - Tidy up.
1 parent 886db84 commit 001f712

File tree

1 file changed

+2
-182
lines changed

1 file changed

+2
-182
lines changed

entries/ikelaiah/src/weatherstation.pas

Lines changed: 2 additions & 182 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ interface
1111
, streamex
1212
, bufstream
1313
, lgHashMap
14-
, StrUtils
1514
{$IFDEF DEBUG}
1615
, Stopwatch
1716
{$ENDIF}
@@ -52,8 +51,6 @@ TWeatherStation = class
5251
lookupStrFloatToIntList: TValidTemperatureDictionary;
5352
procedure CreateLookupTemp;
5453
procedure ReadMeasurements;
55-
procedure ReadMeasurementsBuf;
56-
procedure ReadMeasurementsBufSL;
5754
procedure ParseStationAndTemp(const line: string);
5855
procedure AddCityTemperatureLG(const cityName: string; const newTemp: int64);
5956
procedure SortWeatherStationAndStats;
@@ -243,9 +240,6 @@ procedure TWeatherStation.AddCityTemperatureLG(const cityName: string;
243240
// If city name esxists, modify temp as needed
244241
if self.weatherDictionary.TryGetValue(cityName, stat) then
245242
begin
246-
// Get the temp record
247-
// stat := self.weatherDictionary[cityName];
248-
249243
// Update min and max temps if needed
250244
// Re-arranged the if statement, to achieve minimal if checks.
251245
// This saves approx 15 seconds when processing 1 billion row.
@@ -303,7 +297,7 @@ procedure TWeatherStation.ParseStationAndTemp(const line: string);
303297
if delimiterPos > 0 then
304298
begin
305299
// Get the weather station name
306-
// Using Copy and POS - as suggested by Gemini AI.
300+
// Using Copy and POS instead of SplitString - as suggested by Gemini AI.
307301
// This part saves 3 mins faster when processing 1 billion rows.
308302

309303
// No need to create a string
@@ -321,15 +315,13 @@ procedure TWeatherStation.ParseStationAndTemp(const line: string);
321315
procedure TWeatherStation.ReadMeasurements;
322316
var
323317
fileStream: TFileStream;
324-
bufStream: TReadBufStream;
325318
streamReader: TStreamReader;
326-
line: string;
327319
begin
328320

329321
// Open the file for reading
330322
fileStream := TFileStream.Create(self.fname, fmOpenRead);
331323
try
332-
streamReader := TStreamReader.Create(fileStream, 65536 * 16, False);
324+
streamReader := TStreamReader.Create(fileStream, 65536 * 32, False);
333325
try
334326
// Read and parse chunks of data until EOF -------------------------------
335327
while not streamReader.EOF do
@@ -346,183 +338,11 @@ procedure TWeatherStation.ReadMeasurements;
346338
end;
347339
end;
348340

349-
procedure TWeatherStation.ReadMeasurementsBuf;
350-
var
351-
fileStream: TFileStream;
352-
memStream: TMemoryStream;
353-
streamReader: TStreamReader;
354-
buffer: TBytes;
355-
bytesRead, totalBytesRead, chunkSize, lineBreakPos, chunkIndex,
356-
index, lineCount: int64;
357-
begin
358-
359-
chunksize := 4194304 * 1;
360-
361-
// Open the file for reading
362-
fileStream := TFileStream.Create(self.fname, fmOpenRead);
363-
SetLength(buffer, chunkSize);
364-
try
365-
memStream := TMemoryStream.Create;
366-
try
367-
totalBytesRead := 0;
368-
chunkIndex := 0;
369-
lineCount := 0;
370-
371-
// Read and parse chunks of data until EOF
372-
while totalBytesRead < fileStream.Size do
373-
begin
374-
// Read more bytes and keep track on bytes read
375-
bytesRead := fileStream.Read(buffer[0], chunkSize);
376-
Inc(totalBytesRead, bytesRead);
377-
378-
// Find the position of the last newline character in the chunk
379-
lineBreakPos := BytesRead;
380-
while (lineBreakPos > 0) and (Buffer[lineBreakPos - 1] <> Ord(#10)) do
381-
Dec(lineBreakPos);
382-
383-
{ Now, must ensure that if the last byte read in the current chunk
384-
is not a newline character, the file pointer is moved back to include
385-
that byte and any preceding bytes of the partial line in the next
386-
chunk's read operation.
387-
388-
Also, no need to update the BytesRead variable in this context because
389-
it represents the actual number of bytes read from the file, including
390-
any partial line that may have been included due to moving the file
391-
pointer back.
392-
Ref: https://www.freepascal.org/docs-html/rtl/classes/tstream.seek.html}
393-
if lineBreakPos < bytesRead then
394-
fileStream.Seek(-(bytesRead - lineBreakPos), soCurrent);
395-
{$IFDEF DEBUG}
396-
// Do something with the chunk here
397-
// Like counting line
398-
for index := 0 to lineBreakPos - 1 do
399-
if buffer[index] = Ord(#10) then
400-
lineCount := lineCount + 1;
401-
{$ENDIF DEBUG}
402-
403-
// Use memory stream & stream reader
404-
memStream.Write(buffer[0], lineBreakPos - 1);
405-
memStream.Position := 0;
406-
streamReader := TStreamReader.Create(memStream);
407-
try
408-
while not streamReader.EOF do
409-
begin
410-
// WriteLn(streamReader.ReadLine);
411-
self.ParseStationAndTemp(streamReader.ReadLine);
412-
end;
413-
finally
414-
streamReader.Free;
415-
end;
416-
{$IFDEF DEBUG}
417-
// Display user feedback
418-
WriteLn('Line count: ', IntToStr(lineCount));
419-
WriteLn('Chunk ', chunkIndex, ', Total bytes read:', IntToStr(totalBytesRead));
420-
{$ENDIF DEBUG}
421-
422-
{$IFDEF DEBUG}
423-
// Increase chunk index - a counter
424-
Inc(chunkIndex);
425-
{$ENDIF DEBUG}
426-
end;
427-
finally
428-
memStream.Free;
429-
end;
430-
finally
431-
// Close the file
432-
fileStream.Free;
433-
end;
434-
end;
435-
436-
procedure TWeatherStation.ReadMeasurementsBufSL;
437-
var
438-
fileStream: TFileStream;
439-
strList: TStringList;
440-
streamReader: TStreamReader;
441-
buffer, trimmedBuffer: TBytes;
442-
bytesRead, totalBytesRead, chunkSize, lineBreakPos, chunkIndex,
443-
slIndex, lineCount: int64;
444-
begin
445-
446-
chunksize := 8192 * 1;
447-
448-
// Open the file for reading
449-
fileStream := TFileStream.Create(self.fname, fmOpenRead);
450-
SetLength(buffer, chunkSize);
451-
try
452-
strList := TStringList.Create;
453-
try
454-
totalBytesRead := 0;
455-
chunkIndex := 0;
456-
lineCount := 0;
457-
458-
// Read and parse chunks of data until EOF
459-
while totalBytesRead < fileStream.Size do
460-
begin
461-
// Read more bytes and keep track on bytes read
462-
bytesRead := fileStream.Read(buffer[0], chunkSize);
463-
Inc(totalBytesRead, bytesRead);
464-
465-
// Find the position of the last newline character in the chunk
466-
lineBreakPos := BytesRead;
467-
while (lineBreakPos > 0) and (Buffer[lineBreakPos - 1] <> Ord(#10)) do
468-
Dec(lineBreakPos);
469-
470-
{ Now, must ensure that if the last byte read in the current chunk
471-
is not a newline character, the file pointer is moved back to include
472-
that byte and any preceding bytes of the partial line in the next
473-
chunk's read operation.
474-
475-
Also, no need to update the BytesRead variable in this context because
476-
it represents the actual number of bytes read from the file, including
477-
any partial line that may have been included due to moving the file
478-
pointer back.
479-
Ref: https://www.freepascal.org/docs-html/rtl/classes/tstream.seek.html}
480-
if lineBreakPos < bytesRead then
481-
fileStream.Seek(-(bytesRead - lineBreakPos), soCurrent);
482-
{$IFDEF DEBUG}
483-
// Do something with the chunk here
484-
// Like counting line
485-
for index := 0 to lineBreakPos - 1 do
486-
if buffer[index] = Ord(#10) then
487-
lineCount := lineCount + 1;
488-
{$ENDIF DEBUG}
489-
490-
// Use TStringList and a sub-TBytes array up to lineBreakPos
491-
SetLength(trimmedBuffer, lineBreakPos);
492-
// Index 'n' is inclusive, so add 1 to the length
493-
Move(buffer[0], trimmedBuffer[0], Length(trimmedBuffer)); // Copy the bytes
494-
strList.Clear;
495-
strList.Text := ansistring(trimmedBuffer);
496-
for slIndex := 0 to strList.Count - 1 do
497-
self.ParseStationAndTemp(strList[slIndex]);
498-
499-
{$IFDEF DEBUG}
500-
// Display user feedback
501-
WriteLn('Line count: ', IntToStr(lineCount));
502-
WriteLn('Chunk ', chunkIndex, ', Total bytes read:', IntToStr(totalBytesRead));
503-
{$ENDIF DEBUG}
504-
505-
{$IFDEF DEBUG}
506-
// Increase chunk index - a counter
507-
Inc(chunkIndex);
508-
{$ENDIF DEBUG}
509-
end;
510-
finally
511-
strList.Free;
512-
end;
513-
finally
514-
// Close the file
515-
fileStream.Free;
516-
end;
517-
end;
518-
519341
// The main algorithm
520342
procedure TWeatherStation.ProcessMeasurements;
521343
begin
522344
self.CreateLookupTemp;
523345
self.ReadMeasurements;
524-
// self.ReadMeasurementsBuf;
525-
//self.ReadMeasurementsBufSL;
526346
self.SortWeatherStationAndStats;
527347
self.PrintSortedWeatherStationAndStats;
528348
end;

0 commit comments

Comments
 (0)