diff --git a/src/BufferedFileParseTrait.php b/src/BufferedFileParseTrait.php new file mode 100644 index 0000000..e6f707f --- /dev/null +++ b/src/BufferedFileParseTrait.php @@ -0,0 +1,98 @@ +): array{?string, ?string} $processMatch + * @return Iterator + */ + private function parseFileBuffered(string $path, string $pattern, callable $processMatch): Iterator + { + $handle = @fopen($path, 'rb'); + if ($handle === false) { + throw new RuntimeException("Cannot open file '$path'."); + } + + try { + $buffer = ''; + $offset = 0; + $eof = false; + $chunkSize = 65536; // 64 KiB + + while (true) { + // Read more data if buffer is running low and file is not exhausted + while (!$eof && strlen($buffer) - $offset < $chunkSize) { + $chunk = fread($handle, $chunkSize); + if ($chunk === false || $chunk === '') { + $eof = feof($handle); + break; + } + $buffer .= $chunk; + $eof = feof($handle); + } + + if ($offset >= strlen($buffer)) { + break; + } + + if (preg_match($pattern, $buffer, $match, 0, $offset) !== 1) { + break; + } + + $matchEnd = $offset + strlen($match[0]); + + // Safety check: if the match reaches the end of the buffer and we're not at EOF, + // read more data and retry — prevents \z from falsely matching at a chunk boundary + if ($matchEnd >= strlen($buffer) && !$eof) { + $chunk = fread($handle, $chunkSize); + if ($chunk !== false && $chunk !== '') { + $buffer .= $chunk; + $eof = feof($handle); + continue; // retry the match with more data + } + $eof = true; + } + + $offset = $matchEnd; + + [$query, $newPattern] = $processMatch($match); + + if ($newPattern !== null) { + $pattern = $newPattern; + } + + if ($query !== null) { + yield $query; + } elseif ($newPattern === null) { + // No query and no pattern change means we hit the \z end-of-content branch + break; + } + + // Trim consumed content from the buffer to free memory + if ($offset > $chunkSize) { + $buffer = substr($buffer, $offset); + $offset = 0; + } + } + + if ($offset !== strlen($buffer)) { + throw new RuntimeException("Failed to parse file '$path', please report an issue."); + } + } finally { + fclose($handle); + } + } +} diff --git a/src/MySqlMultiQueryParser.php b/src/MySqlMultiQueryParser.php index 508c572..0033d77 100644 --- a/src/MySqlMultiQueryParser.php +++ b/src/MySqlMultiQueryParser.php @@ -3,40 +3,27 @@ namespace Nextras\MultiQueryParser; use Iterator; -use Nextras\MultiQueryParser\Exception\RuntimeException; -use function file_get_contents; -use function preg_match; use function preg_quote; -use function strlen; class MySqlMultiQueryParser implements IMultiQueryParser { - public function parseFile(string $path): Iterator - { - $content = @file_get_contents($path); - if ($content === false) { - throw new RuntimeException("Cannot open file '$path'."); - } - - $offset = 0; - $pattern = $this->getQueryPattern(';'); + use BufferedFileParseTrait; - while (preg_match($pattern, $content, $match, 0, $offset) === 1) { - $offset += strlen($match[0]); - if (isset($match['delimiter']) && $match['delimiter'] !== '') { - $pattern = $this->getQueryPattern($match['delimiter']); - } elseif (isset($match['query']) && $match['query'] !== '') { - yield $match['query']; - } else { - break; + public function parseFile(string $path): Iterator + { + return $this->parseFileBuffered( + $path, + $this->getQueryPattern(';'), + function (array $match): array { + if (isset($match['delimiter']) && $match['delimiter'] !== '') { + return [null, $this->getQueryPattern($match['delimiter'])]; + } + $query = (isset($match['query']) && $match['query'] !== '') ? $match['query'] : null; + return [$query, null]; } - } - - if ($offset !== strlen($content)) { - throw new RuntimeException("Failed to parse file '$path', please report an issue."); - } + ); } diff --git a/src/PostgreSqlMultiQueryParser.php b/src/PostgreSqlMultiQueryParser.php index 0620027..ba4ca00 100644 --- a/src/PostgreSqlMultiQueryParser.php +++ b/src/PostgreSqlMultiQueryParser.php @@ -3,37 +3,23 @@ namespace Nextras\MultiQueryParser; use Iterator; -use Nextras\MultiQueryParser\Exception\RuntimeException; -use function file_get_contents; -use function preg_match; -use function strlen; class PostgreSqlMultiQueryParser implements IMultiQueryParser { - public function parseFile(string $path): Iterator - { - $content = @file_get_contents($path); - if ($content === false) { - throw new RuntimeException("Cannot open file '$path'."); - } - - $offset = 0; - $pattern = $this->getQueryPattern(); + use BufferedFileParseTrait; - while (preg_match($pattern, $content, $match, 0, $offset)) { - $offset += strlen($match[0]); - if (isset($match['query']) && $match['query'] !== '') { - yield $match['query']; - } else { - break; + public function parseFile(string $path): Iterator + { + return $this->parseFileBuffered( + $path, + $this->getQueryPattern(), + static function (array $match): array { + $query = (isset($match['query']) && $match['query'] !== '') ? $match['query'] : null; + return [$query, null]; } - } - - if ($offset !== strlen($content)) { - throw new RuntimeException("Failed to parse file '$path', please report an issue."); - } + ); } diff --git a/src/SqlServerMultiQueryParser.php b/src/SqlServerMultiQueryParser.php index 72f51bc..6647f0b 100644 --- a/src/SqlServerMultiQueryParser.php +++ b/src/SqlServerMultiQueryParser.php @@ -3,37 +3,23 @@ namespace Nextras\MultiQueryParser; use Iterator; -use Nextras\MultiQueryParser\Exception\RuntimeException; -use function file_get_contents; -use function preg_match; -use function strlen; class SqlServerMultiQueryParser implements IMultiQueryParser { - public function parseFile(string $path): Iterator - { - $content = @file_get_contents($path); - if ($content === false) { - throw new RuntimeException("Cannot open file '$path'."); - } - - $offset = 0; - $pattern = $this->getQueryPattern(); + use BufferedFileParseTrait; - while (preg_match($pattern, $content, $match, 0, $offset)) { - $offset += strlen($match[0]); - if (isset($match['query']) && $match['query'] !== '') { - yield $match['query']; - } else { - break; + public function parseFile(string $path): Iterator + { + return $this->parseFileBuffered( + $path, + $this->getQueryPattern(), + static function (array $match): array { + $query = (isset($match['query']) && $match['query'] !== '') ? $match['query'] : null; + return [$query, null]; } - } - - if ($offset !== strlen($content)) { - throw new RuntimeException("Failed to parse file '$path', please report an issue."); - } + ); }