diff --git a/src/Type/Regex/RegexGroupParser.php b/src/Type/Regex/RegexGroupParser.php index e2ab45d31d..a9066f5b6b 100644 --- a/src/Type/Regex/RegexGroupParser.php +++ b/src/Type/Regex/RegexGroupParser.php @@ -448,18 +448,38 @@ private function getRootAlternation(TreeNode $group): ?TreeNode if ( $group->getId() === '#capturing' && count($group->getChildren()) === 1 - && $group->getChild(0)->getId() === '#alternation' ) { - return $group->getChild(0); + $child = $group->getChild(0); + if ($child->getId() === '#alternation') { + return $child; + } + + if ( + $child->getId() === '#noncapturing' + && count($child->getChildren()) === 1 + && $child->getChild(0)->getId() === '#alternation' + ) { + return $child->getChild(0); + } } // 1st token within a named capturing group is a token holding the group-name if ( $group->getId() === '#namedcapturing' && count($group->getChildren()) === 2 - && $group->getChild(1)->getId() === '#alternation' ) { - return $group->getChild(1); + $child = $group->getChild(1); + if ($child->getId() === '#alternation') { + return $child; + } + + if ( + $child->getId() === '#noncapturing' + && count($child->getChildren()) === 1 + && $child->getChild(0)->getId() === '#alternation' + ) { + return $child->getChild(0); + } } return null; @@ -606,6 +626,10 @@ private function walkGroupAst( $walkResult = $walkResult->numeric(TrinaryLogic::createNo()); } + if (in_array($ast->getId(), ['#lookahead', '#negativelookahead', '#lookbehind', '#negativelookbehind'], true)) { + return $walkResult; + } + foreach ($children as $child) { $walkResult = $this->walkGroupAst( $child, @@ -620,6 +644,10 @@ private function walkGroupAst( private function isMaybeEmptyNode(TreeNode $node, string $patternModifiers, bool &$isNonFalsy): bool { + if (in_array($node->getId(), ['#lookahead', '#negativelookahead', '#lookbehind', '#negativelookbehind'], true)) { + return true; + } + if ($node->getId() === '#quantification') { [$min] = $this->getQuantificationRange($node); @@ -640,6 +668,25 @@ private function isMaybeEmptyNode(TreeNode $node, string $patternModifiers, bool return $literal === ''; } + if ($node->getId() === '#alternation') { + $allNonFalsy = true; + foreach ($node->getChildren() as $child) { + $childNonFalsy = false; + if ($this->isMaybeEmptyNode($child, $patternModifiers, $childNonFalsy)) { + return true; + } + if ($childNonFalsy) { + continue; + } + + $allNonFalsy = false; + } + if ($allNonFalsy) { + $isNonFalsy = true; + } + return false; + } + foreach ($node->getChildren() as $child) { if (!$this->isMaybeEmptyNode($child, $patternModifiers, $isNonFalsy)) { return false; diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 8dac3aa80c..e710ea4572 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -1074,6 +1074,53 @@ function bug12792(string $string): void { } } +function bug12840(string $s): void { + // Empty alternation in non-capturing group + if (preg_match('~^((?:|\d+))x$~s', $s, $matches) === 1) { + assertType("array{non-falsy-string, ''|numeric-string}", $matches); + } + + // Lookahead in alternation - lookahead is zero-width + if (preg_match('~^(a?(?:(?=x)|y))x$~s', $s, $matches) === 1) { + assertType("array{non-falsy-string, string}", $matches); + } + + // Lookbehind in alternation - lookbehind is zero-width + if (preg_match('~^(a?(?:(?<=x)|y))x$~s', $s, $matches) === 1) { + assertType("array{non-falsy-string, string}", $matches); + } + + // Negative lookahead - zero-width + if (preg_match('~^(a?(?:(?!z)|y))x$~s', $s, $matches) === 1) { + assertType("array{non-falsy-string, string}", $matches); + } + + // Negative lookbehind - zero-width + if (preg_match('~^(a?(?:(?(?:|\d+))x$~s', $s, $matches) === 1) { + assertType("array{0: non-falsy-string, g: ''|numeric-string, 1: ''|numeric-string}", $matches); + } + + // Lookahead in concatenation with literal - group still captures the literal + if (preg_match('~^((?=x)a)x$~s', $s, $matches) === 1) { + assertType("array{non-falsy-string, non-falsy-string}", $matches); + } +} + function testExtendedSyntaxEscapedHash(string $string): void { // \# in extended mode should be treated as literal hash, not comment if (preg_match('/^ ([\#.]) $/x', $string, $matches)) {