From 8e964cf0f935723480481ff75770187bb3befc1d Mon Sep 17 00:00:00 2001 From: artpi Date: Fri, 15 May 2026 13:54:12 +0200 Subject: [PATCH 1/5] Fix toolkit function redeclare handling --- components/DataLiberation/URL/functions.php | 174 ++--- components/Encoding/compat-utf8.php | 660 +++++++++--------- components/Encoding/utf8-encoder.php | 54 +- components/Encoding/utf8.php | 320 +++++---- components/Filesystem/functions.php | 351 +++++----- components/Git/functions.php | 32 +- components/Zip/functions.php | 22 +- .../Tests/BootstrapCompatibilityTest.php | 77 ++ plugins/push-md/push-md-dev-bootstrap.php | 4 +- plugins/push-md/push-md-phar-bootstrap.php | 4 +- plugins/push-md/push-md-toolkit-bootstrap.php | 6 +- plugins/push-md/push-md-toolkit-loader.php | 19 + 12 files changed, 939 insertions(+), 784 deletions(-) create mode 100644 plugins/push-md/Tests/BootstrapCompatibilityTest.php create mode 100644 plugins/push-md/push-md-toolkit-loader.php diff --git a/components/DataLiberation/URL/functions.php b/components/DataLiberation/URL/functions.php index bc8b9a62..9596b017 100644 --- a/components/DataLiberation/URL/functions.php +++ b/components/DataLiberation/URL/functions.php @@ -36,33 +36,35 @@ * then it would be nice to re-encode that block markup also without the space character. This is similar * to how the tag processor avoids changing parts of the tag it doesn't need to change. */ -function wp_rewrite_urls( $options ) { - if ( empty( $options['base_url'] ) ) { - // Use first from-url as base_url if not specified. - $from_urls = array_keys( $options['url-mapping'] ); - $options['base_url'] = $from_urls[0]; - } +if ( ! function_exists( __NAMESPACE__ . '\\wp_rewrite_urls' ) ) { + function wp_rewrite_urls( $options ) { + if ( empty( $options['base_url'] ) ) { + // Use first from-url as base_url if not specified. + $from_urls = array_keys( $options['url-mapping'] ); + $options['base_url'] = $from_urls[0]; + } - $url_mapping = array(); - foreach ( $options['url-mapping'] as $from_url_string => $to_url_string ) { - $url_mapping[] = array( - 'from_url' => WPURL::parse( $from_url_string ), - 'to_url' => WPURL::parse( $to_url_string ), - ); - } + $url_mapping = array(); + foreach ( $options['url-mapping'] as $from_url_string => $to_url_string ) { + $url_mapping[] = array( + 'from_url' => WPURL::parse( $from_url_string ), + 'to_url' => WPURL::parse( $to_url_string ), + ); + } - $p = new BlockMarkupUrlProcessor( $options['block_markup'], $options['base_url'] ); - while ( $p->next_url() ) { - $parsed_url = $p->get_parsed_url(); - foreach ( $url_mapping as $mapping ) { - if ( is_child_url_of( $parsed_url, $mapping['from_url'] ) ) { - $p->replace_base_url( $mapping['to_url'] ); - break; + $p = new BlockMarkupUrlProcessor( $options['block_markup'], $options['base_url'] ); + while ( $p->next_url() ) { + $parsed_url = $p->get_parsed_url(); + foreach ( $url_mapping as $mapping ) { + if ( is_child_url_of( $parsed_url, $mapping['from_url'] ) ) { + $p->replace_base_url( $mapping['to_url'] ); + break; + } } } - } - return $p->get_updated_html(); + return $p->get_updated_html(); + } } /** @@ -73,32 +75,34 @@ function wp_rewrite_urls( $options ) { * * @return bool Whether the URL matches the current site URL. */ -function is_child_url_of( $child, $parent_url ) { - $parent_url = is_string( $parent_url ) ? WPURL::parse( $parent_url ) : $parent_url; - $child = is_string( $child ) ? WPURL::parse( $child ) : $child; - $child_pathname_no_trailing_slash = rtrim( urldecode( $child->pathname ), '/' ); - - if ( false === $child || false === $parent_url ) { - return false; - } +if ( ! function_exists( __NAMESPACE__ . '\\is_child_url_of' ) ) { + function is_child_url_of( $child, $parent_url ) { + $parent_url = is_string( $parent_url ) ? WPURL::parse( $parent_url ) : $parent_url; + $child = is_string( $child ) ? WPURL::parse( $child ) : $child; + $child_pathname_no_trailing_slash = rtrim( urldecode( $child->pathname ), '/' ); + + if ( false === $child || false === $parent_url ) { + return false; + } - if ( $parent_url->hostname !== $child->hostname ) { - return false; - } + if ( $parent_url->hostname !== $child->hostname ) { + return false; + } - if ( $parent_url->protocol !== $child->protocol ) { - return false; - } + if ( $parent_url->protocol !== $child->protocol ) { + return false; + } - $parent_pathname = urldecode( $parent_url->pathname ); + $parent_pathname = urldecode( $parent_url->pathname ); - return ( - // Direct match. - $parent_pathname === $child_pathname_no_trailing_slash || - $parent_pathname === $child_pathname_no_trailing_slash . '/' || - // Path prefix. - 0 === strncmp( $child_pathname_no_trailing_slash . '/', $parent_pathname, strlen( $parent_pathname ) ) - ); + return ( + // Direct match. + $parent_pathname === $child_pathname_no_trailing_slash || + $parent_pathname === $child_pathname_no_trailing_slash . '/' || + // Path prefix. + 0 === strncmp( $child_pathname_no_trailing_slash . '/', $parent_pathname, strlen( $parent_pathname ) ) + ); + } } /** @@ -112,53 +116,55 @@ function is_child_url_of( $child, $parent_url ) { * * @return string The decoded string. */ -function urldecode_n( $input, $decode_n ) { - // Fast paths: nothing to do. - if ( $decode_n <= 0 || false === strpos( $input, '%' ) ) { +if ( ! function_exists( __NAMESPACE__ . '\\urldecode_n' ) ) { + function urldecode_n( $input, $decode_n ) { + // Fast paths: nothing to do. + if ( $decode_n <= 0 || false === strpos( $input, '%' ) ) { return $input; - } - - $result = ''; - $at = 0; - while ( true ) { - if ( $at + 3 > strlen( $input ) ) { - break; } - $last_at = $at; - $at += strcspn( $input, '%', $at ); - // Consume bytes except for the percent sign. - $result .= substr( $input, $last_at, $at - $last_at ); + $result = ''; + $at = 0; + while ( true ) { + if ( $at + 3 > strlen( $input ) ) { + break; + } - // If we've already decoded the requested number of bytes, stop. - if ( strlen( $result ) >= $decode_n ) { - break; - } + $last_at = $at; + $at += strcspn( $input, '%', $at ); + // Consume bytes except for the percent sign. + $result .= substr( $input, $last_at, $at - $last_at ); - ++$at; - if ( $at > strlen( $input ) ) { - break; - } + // If we've already decoded the requested number of bytes, stop. + if ( strlen( $result ) >= $decode_n ) { + break; + } - $decodable_length = strspn( - $input, - '0123456789ABCDEFabcdef', - $at, - 2 - ); + ++$at; + if ( $at > strlen( $input ) ) { + break; + } - if ( 2 === $decodable_length ) { - // Decodes the urlencoded hex sequence from URL. - // Note: This decodes bytes, not characters. It will recover the original byte sequence, - // not necessarily any valid UTF-8 characters. - $result .= chr( hexdec( $input[ $at ] . $input[ $at + 1 ] ) ); - $at += 2; - } else { - // Consume the next byte and move on. - $result .= '%'; + $decodable_length = strspn( + $input, + '0123456789ABCDEFabcdef', + $at, + 2 + ); + + if ( 2 === $decodable_length ) { + // Decodes the urlencoded hex sequence from URL. + // Note: This decodes bytes, not characters. It will recover the original byte sequence, + // not necessarily any valid UTF-8 characters. + $result .= chr( hexdec( $input[ $at ] . $input[ $at + 1 ] ) ); + $at += 2; + } else { + // Consume the next byte and move on. + $result .= '%'; + } } - } - $result .= substr( $input, $at ); + $result .= substr( $input, $at ); - return $result; + return $result; + } } diff --git a/components/Encoding/compat-utf8.php b/components/Encoding/compat-utf8.php index 89dafb5c..1a731def 100644 --- a/components/Encoding/compat-utf8.php +++ b/components/Encoding/compat-utf8.php @@ -46,204 +46,206 @@ * @param bool|null $has_noncharacters Set to indicate if scanned string contained noncharacters. * @return int How many code points were successfully scanned. */ -function _wp_scan_utf8( string $bytes, int &$at, int &$invalid_length, ?int $max_bytes = null, ?int $max_code_points = null, ?bool &$has_noncharacters = null ): int { - $byte_length = strlen( $bytes ); - $end = min( $byte_length, $at + ( $max_bytes ?? PHP_INT_MAX ) ); - $invalid_length = 0; - $count = 0; - $max_count = $max_code_points ?? PHP_INT_MAX; - $has_noncharacters = false; - - for ( $i = $at; $i < $end && $count <= $max_count; $i++ ) { - /* - * Quickly skip past US-ASCII bytes, all of which are valid UTF-8. - * - * This optimization step improves the speed from 10x to 100x - * depending on whether the JIT has optimized the function. - */ - $ascii_byte_count = strspn( - $bytes, - "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" . - "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" . - " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f", - $i, - $end - $i - ); - - if ( $count + $ascii_byte_count >= $max_count ) { - $at = $i + ( $max_count - $count ); - $count = $max_count; - return $count; - } +if ( ! function_exists( __NAMESPACE__ . '\\_wp_scan_utf8' ) ) { + function _wp_scan_utf8( string $bytes, int &$at, int &$invalid_length, ?int $max_bytes = null, ?int $max_code_points = null, ?bool &$has_noncharacters = null ): int { + $byte_length = strlen( $bytes ); + $end = min( $byte_length, $at + ( $max_bytes ?? PHP_INT_MAX ) ); + $invalid_length = 0; + $count = 0; + $max_count = $max_code_points ?? PHP_INT_MAX; + $has_noncharacters = false; + + for ( $i = $at; $i < $end && $count <= $max_count; $i++ ) { + /* + * Quickly skip past US-ASCII bytes, all of which are valid UTF-8. + * + * This optimization step improves the speed from 10x to 100x + * depending on whether the JIT has optimized the function. + */ + $ascii_byte_count = strspn( + $bytes, + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" . + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" . + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f", + $i, + $end - $i + ); - $count += $ascii_byte_count; - $i += $ascii_byte_count; + if ( $count + $ascii_byte_count >= $max_count ) { + $at = $i + ( $max_count - $count ); + $count = $max_count; + return $count; + } - if ( $i >= $end ) { - $at = $end; - return $count; - } + $count += $ascii_byte_count; + $i += $ascii_byte_count; - /** - * The above fast-track handled all single-byte UTF-8 characters. What - * follows MUST be a multibyte sequence otherwise there’s invalid UTF-8. - * - * Therefore everything past here is checking those multibyte sequences. - * - * It may look like there’s a need to check against the max bytes here, - * but since each match of a single character returns, this functions will - * bail already if crossing the max-bytes threshold. This function SHALL - * NOT return in the middle of a multi-byte character, so if a character - * falls on each side of the max bytes, the entire character will be scanned. - * - * Because it’s possible that there are truncated characters, the use of - * the null-coalescing operator with "\xC0" is a convenience for skipping - * length checks on every continuation bytes. This works because 0xC0 is - * always invalid in a UTF-8 string, meaning that if the string has been - * truncated, it will find 0xC0 and reject as invalid UTF-8. - * - * > [The following table] lists all of the byte sequences that are well-formed - * > in UTF-8. A range of byte values such as A0..BF indicates that any byte - * > from A0 to BF (inclusive) is well-formed in that position. Any byte value - * > outside of the ranges listed is ill-formed. - * - * > Table 3-7. Well-Formed UTF-8 Byte Sequences - * ╭─────────────────────┬────────────┬──────────────┬─────────────┬──────────────╮ - * │ Code Points │ First Byte │ Second Byte │ Third Byte │ Fourth Byte │ - * ├─────────────────────┼────────────┼──────────────┼─────────────┼──────────────┤ - * │ U+0000..U+007F │ 00..7F │ │ │ │ - * │ U+0080..U+07FF │ C2..DF │ 80..BF │ │ │ - * │ U+0800..U+0FFF │ E0 │ A0..BF │ 80..BF │ │ - * │ U+1000..U+CFFF │ E1..EC │ 80..BF │ 80..BF │ │ - * │ U+D000..U+D7FF │ ED │ 80..9F │ 80..BF │ │ - * │ U+E000..U+FFFF │ EE..EF │ 80..BF │ 80..BF │ │ - * │ U+10000..U+3FFFF │ F0 │ 90..BF │ 80..BF │ 80..BF │ - * │ U+40000..U+FFFFF │ F1..F3 │ 80..BF │ 80..BF │ 80..BF │ - * │ U+100000..U+10FFFF │ F4 │ 80..8F │ 80..BF │ 80..BF │ - * ╰─────────────────────┴────────────┴──────────────┴─────────────┴──────────────╯ - * - * @see https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G27506 - */ - - // Valid two-byte code points. - $b1 = ord( $bytes[ $i ] ); - $b2 = ord( $bytes[ $i + 1 ] ?? "\xC0" ); - - if ( $b1 >= 0xC2 && $b1 <= 0xDF && $b2 >= 0x80 && $b2 <= 0xBF ) { - ++$count; - ++$i; - continue; - } + if ( $i >= $end ) { + $at = $end; + return $count; + } + + /** + * The above fast-track handled all single-byte UTF-8 characters. What + * follows MUST be a multibyte sequence otherwise there’s invalid UTF-8. + * + * Therefore everything past here is checking those multibyte sequences. + * + * It may look like there’s a need to check against the max bytes here, + * but since each match of a single character returns, this functions will + * bail already if crossing the max-bytes threshold. This function SHALL + * NOT return in the middle of a multi-byte character, so if a character + * falls on each side of the max bytes, the entire character will be scanned. + * + * Because it’s possible that there are truncated characters, the use of + * the null-coalescing operator with "\xC0" is a convenience for skipping + * length checks on every continuation bytes. This works because 0xC0 is + * always invalid in a UTF-8 string, meaning that if the string has been + * truncated, it will find 0xC0 and reject as invalid UTF-8. + * + * > [The following table] lists all of the byte sequences that are well-formed + * > in UTF-8. A range of byte values such as A0..BF indicates that any byte + * > from A0 to BF (inclusive) is well-formed in that position. Any byte value + * > outside of the ranges listed is ill-formed. + * + * > Table 3-7. Well-Formed UTF-8 Byte Sequences + * ╭─────────────────────┬────────────┬──────────────┬─────────────┬──────────────╮ + * │ Code Points │ First Byte │ Second Byte │ Third Byte │ Fourth Byte │ + * ├─────────────────────┼────────────┼──────────────┼─────────────┼──────────────┤ + * │ U+0000..U+007F │ 00..7F │ │ │ │ + * │ U+0080..U+07FF │ C2..DF │ 80..BF │ │ │ + * │ U+0800..U+0FFF │ E0 │ A0..BF │ 80..BF │ │ + * │ U+1000..U+CFFF │ E1..EC │ 80..BF │ 80..BF │ │ + * │ U+D000..U+D7FF │ ED │ 80..9F │ 80..BF │ │ + * │ U+E000..U+FFFF │ EE..EF │ 80..BF │ 80..BF │ │ + * │ U+10000..U+3FFFF │ F0 │ 90..BF │ 80..BF │ 80..BF │ + * │ U+40000..U+FFFFF │ F1..F3 │ 80..BF │ 80..BF │ 80..BF │ + * │ U+100000..U+10FFFF │ F4 │ 80..8F │ 80..BF │ 80..BF │ + * ╰─────────────────────┴────────────┴──────────────┴─────────────┴──────────────╯ + * + * @see https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G27506 + */ + + // Valid two-byte code points. + $b1 = ord( $bytes[ $i ] ); + $b2 = ord( $bytes[ $i + 1 ] ?? "\xC0" ); + + if ( $b1 >= 0xC2 && $b1 <= 0xDF && $b2 >= 0x80 && $b2 <= 0xBF ) { + ++$count; + ++$i; + continue; + } - // Valid three-byte code points. - $b3 = ord( $bytes[ $i + 2 ] ?? "\xC0" ); + // Valid three-byte code points. + $b3 = ord( $bytes[ $i + 2 ] ?? "\xC0" ); - if ( $b3 < 0x80 || $b3 > 0xBF ) { - goto invalid_utf8; - } + if ( $b3 < 0x80 || $b3 > 0xBF ) { + goto invalid_utf8; + } - if ( + if ( ( 0xE0 === $b1 && $b2 >= 0xA0 && $b2 <= 0xBF ) || ( $b1 >= 0xE1 && $b1 <= 0xEC && $b2 >= 0x80 && $b2 <= 0xBF ) || ( 0xED === $b1 && $b2 >= 0x80 && $b2 <= 0x9F ) || ( $b1 >= 0xEE && $b1 <= 0xEF && $b2 >= 0x80 && $b2 <= 0xBF ) - ) { - ++$count; - $i += 2; + ) { + ++$count; + $i += 2; - // Covers the range U+FDD0–U+FDEF, U+FFFE, U+FFFF. - if ( 0xEF === $b1 ) { - $has_noncharacters |= ( + // Covers the range U+FDD0–U+FDEF, U+FFFE, U+FFFF. + if ( 0xEF === $b1 ) { + $has_noncharacters |= ( ( 0xB7 === $b2 && $b3 >= 0x90 && $b3 <= 0xAF ) || ( 0xBF === $b2 && ( 0xBE === $b3 || 0xBF === $b3 ) ) - ); - } + ); + } - continue; - } + continue; + } - // Valid four-byte code points. - $b4 = ord( $bytes[ $i + 3 ] ?? "\xC0" ); + // Valid four-byte code points. + $b4 = ord( $bytes[ $i + 3 ] ?? "\xC0" ); - if ( $b4 < 0x80 || $b4 > 0xBF ) { - goto invalid_utf8; - } + if ( $b4 < 0x80 || $b4 > 0xBF ) { + goto invalid_utf8; + } - if ( + if ( ( 0xF0 === $b1 && $b2 >= 0x90 && $b2 <= 0xBF ) || ( $b1 >= 0xF1 && $b1 <= 0xF3 && $b2 >= 0x80 && $b2 <= 0xBF ) || ( 0xF4 === $b1 && $b2 >= 0x80 && $b2 <= 0x8F ) - ) { - ++$count; - $i += 3; + ) { + ++$count; + $i += 3; - // Covers U+1FFFE, U+1FFFF, U+2FFFE, U+2FFFF, …, U+10FFFE, U+10FFFF. - $has_noncharacters |= ( + // Covers U+1FFFE, U+1FFFF, U+2FFFE, U+2FFFF, …, U+10FFFE, U+10FFFF. + $has_noncharacters |= ( ( 0x0F === ( $b2 & 0x0F ) ) && 0xBF === $b3 && ( 0xBE === $b4 || 0xBF === $b4 ) - ); + ); - continue; - } + continue; + } - /** - * When encountering invalid byte sequences, Unicode suggests finding the - * maximal subpart of a text and replacing that subpart with a single - * replacement character. - * - * > This practice is more secure because it does not result in the - * > conversion consuming parts of valid sequences as though they were - * > invalid. It also guarantees at least one replacement character will - * > occur for each instance of an invalid sequence in the original text. - * > Furthermore, this practice can be defined consistently for better - * > interoperability between different implementations of conversion. - * - * @see https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-5/#G40630 - */ - invalid_utf8: - $at = $i; - $invalid_length = 1; - - // Single-byte and two-byte characters. - if ( ( 0x00 === ( $b1 & 0x80 ) ) || ( 0xC0 === ( $b1 & 0xE0 ) ) ) { - return $count; - } + /** + * When encountering invalid byte sequences, Unicode suggests finding the + * maximal subpart of a text and replacing that subpart with a single + * replacement character. + * + * > This practice is more secure because it does not result in the + * > conversion consuming parts of valid sequences as though they were + * > invalid. It also guarantees at least one replacement character will + * > occur for each instance of an invalid sequence in the original text. + * > Furthermore, this practice can be defined consistently for better + * > interoperability between different implementations of conversion. + * + * @see https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-5/#G40630 + */ + invalid_utf8: + $at = $i; + $invalid_length = 1; + + // Single-byte and two-byte characters. + if ( ( 0x00 === ( $b1 & 0x80 ) ) || ( 0xC0 === ( $b1 & 0xE0 ) ) ) { + return $count; + } - $b2 = ord( $bytes[ $i + 1 ] ?? "\xC0" ); - $b3 = ord( $bytes[ $i + 2 ] ?? "\xC0" ); + $b2 = ord( $bytes[ $i + 1 ] ?? "\xC0" ); + $b3 = ord( $bytes[ $i + 2 ] ?? "\xC0" ); - // Find the maximal subpart and skip past it. - if ( 0xE0 === ( $b1 & 0xF0 ) ) { - // Three-byte characters. - $b2_valid = ( + // Find the maximal subpart and skip past it. + if ( 0xE0 === ( $b1 & 0xF0 ) ) { + // Three-byte characters. + $b2_valid = ( ( 0xE0 === $b1 && $b2 >= 0xA0 && $b2 <= 0xBF ) || ( $b1 >= 0xE1 && $b1 <= 0xEC && $b2 >= 0x80 && $b2 <= 0xBF ) || ( 0xED === $b1 && $b2 >= 0x80 && $b2 <= 0x9F ) || ( $b1 >= 0xEE && $b1 <= 0xEF && $b2 >= 0x80 && $b2 <= 0xBF ) - ); + ); - $invalid_length = min( $end - $i, $b2_valid ? 2 : 1 ); - return $count; - } elseif ( 0xF0 === ( $b1 & 0xF8 ) ) { - // Four-byte characters. - $b2_valid = ( + $invalid_length = min( $end - $i, $b2_valid ? 2 : 1 ); + return $count; + } elseif ( 0xF0 === ( $b1 & 0xF8 ) ) { + // Four-byte characters. + $b2_valid = ( ( 0xF0 === $b1 && $b2 >= 0x90 && $b2 <= 0xBF ) || ( $b1 >= 0xF1 && $b1 <= 0xF3 && $b2 >= 0x80 && $b2 <= 0xBF ) || ( 0xF4 === $b1 && $b2 >= 0x80 && $b2 <= 0x8F ) - ); + ); - $b3_valid = $b3 >= 0x80 && $b3 <= 0xBF; + $b3_valid = $b3 >= 0x80 && $b3 <= 0xBF; + + $invalid_length = min( $end - $i, $b2_valid ? ( $b3_valid ? 3 : 2 ) : 1 ); + return $count; + } - $invalid_length = min( $end - $i, $b2_valid ? ( $b3_valid ? 3 : 2 ) : 1 ); return $count; } + $at = $i; return $count; } - - $at = $i; - return $count; } /** @@ -257,18 +259,20 @@ function _wp_scan_utf8( string $bytes, int &$at, int &$invalid_length, ?int $max * @param string $bytes String which might contain text encoded as UTF-8. * @return bool Whether the provided bytes can decode as valid UTF-8. */ -function _wp_is_valid_utf8_fallback( string $bytes ): bool { - $bytes_length = strlen( $bytes ); - if ( 0 === $bytes_length ) { - return true; - } +if ( ! function_exists( __NAMESPACE__ . '\\_wp_is_valid_utf8_fallback' ) ) { + function _wp_is_valid_utf8_fallback( string $bytes ): bool { + $bytes_length = strlen( $bytes ); + if ( 0 === $bytes_length ) { + return true; + } - $next_byte_at = 0; - $invalid_length = 0; + $next_byte_at = 0; + $invalid_length = 0; - _wp_scan_utf8( $bytes, $next_byte_at, $invalid_length ); + _wp_scan_utf8( $bytes, $next_byte_at, $invalid_length ); - return $bytes_length === $next_byte_at && 0 === $invalid_length; + return $bytes_length === $next_byte_at && 0 === $invalid_length; + } } /** @@ -286,32 +290,34 @@ function _wp_is_valid_utf8_fallback( string $bytes ): bool { * @param string $bytes UTF-8 encoded string which might contain spans of invalid bytes. * @return string Input string with spans of invalid bytes swapped with the replacement character. */ -function _wp_scrub_utf8_fallback( string $bytes ): string { - $bytes_length = strlen( $bytes ); - $next_byte_at = 0; - $was_at = 0; - $invalid_length = 0; - $scrubbed = ''; - - while ( $next_byte_at <= $bytes_length ) { - _wp_scan_utf8( $bytes, $next_byte_at, $invalid_length ); +if ( ! function_exists( __NAMESPACE__ . '\\_wp_scrub_utf8_fallback' ) ) { + function _wp_scrub_utf8_fallback( string $bytes ): string { + $bytes_length = strlen( $bytes ); + $next_byte_at = 0; + $was_at = 0; + $invalid_length = 0; + $scrubbed = ''; + + while ( $next_byte_at <= $bytes_length ) { + _wp_scan_utf8( $bytes, $next_byte_at, $invalid_length ); + + if ( $next_byte_at >= $bytes_length ) { + if ( 0 === $was_at ) { + return $bytes; + } - if ( $next_byte_at >= $bytes_length ) { - if ( 0 === $was_at ) { - return $bytes; + return $scrubbed . substr( $bytes, $was_at, $next_byte_at - $was_at - $invalid_length ); } - return $scrubbed . substr( $bytes, $was_at, $next_byte_at - $was_at - $invalid_length ); - } + $scrubbed .= substr( $bytes, $was_at, $next_byte_at - $was_at ); + $scrubbed .= "\u{FFFD}"; - $scrubbed .= substr( $bytes, $was_at, $next_byte_at - $was_at ); - $scrubbed .= "\u{FFFD}"; + $next_byte_at += $invalid_length; + $was_at = $next_byte_at; + } - $next_byte_at += $invalid_length; - $was_at = $next_byte_at; + return $scrubbed; } - - return $scrubbed; } /** @@ -340,24 +346,26 @@ function _wp_scrub_utf8_fallback( string $bytes ): string { * Default is to scan until the end of the string. Must be positive. * @return int How many code points were found. */ -function _wp_utf8_codepoint_count( string $text, ?int $byte_offset = 0, ?int $max_byte_length = PHP_INT_MAX ): int { - if ( $byte_offset < 0 ) { - return 0; - } +if ( ! function_exists( __NAMESPACE__ . '\\_wp_utf8_codepoint_count' ) ) { + function _wp_utf8_codepoint_count( string $text, ?int $byte_offset = 0, ?int $max_byte_length = PHP_INT_MAX ): int { + if ( $byte_offset < 0 ) { + return 0; + } - $count = 0; - $at = $byte_offset; - $end = strlen( $text ); - $invalid_length = 0; - $max_byte_length = min( $end - $at, $max_byte_length ); + $count = 0; + $at = $byte_offset; + $end = strlen( $text ); + $invalid_length = 0; + $max_byte_length = min( $end - $at, $max_byte_length ); - while ( $at < $end && ( $at - $byte_offset ) < $max_byte_length ) { - $count += _wp_scan_utf8( $text, $at, $invalid_length, $max_byte_length - ( $at - $byte_offset ) ); - $count += $invalid_length > 0 ? 1 : 0; - $at += $invalid_length; - } + while ( $at < $end && ( $at - $byte_offset ) < $max_byte_length ) { + $count += _wp_scan_utf8( $text, $at, $invalid_length, $max_byte_length - ( $at - $byte_offset ) ); + $count += $invalid_length > 0 ? 1 : 0; + $at += $invalid_length; + } - return $count; + return $count; + } } /** @@ -380,26 +388,28 @@ function _wp_utf8_codepoint_count( string $text, ?int $byte_offset = 0, ?int $ma * the string is not long enough. * @return int Number of bytes spanned by the code points. */ -function _wp_utf8_codepoint_span( string $text, int $byte_offset, int $max_code_points, ?int &$found_code_points = 0 ): int { - $was_at = $byte_offset; - $invalid_length = 0; - $end = strlen( $text ); - $found_code_points = 0; - - while ( $byte_offset < $end && $found_code_points < $max_code_points ) { - $needed = $max_code_points - $found_code_points; - $chunk_count = _wp_scan_utf8( $text, $byte_offset, $invalid_length, null, $needed ); - - $found_code_points += $chunk_count; - - // Invalid spans only convey one code point count regardless of how long they are. - if ( 0 !== $invalid_length && $found_code_points < $max_code_points ) { - ++$found_code_points; - $byte_offset += $invalid_length; +if ( ! function_exists( __NAMESPACE__ . '\\_wp_utf8_codepoint_span' ) ) { + function _wp_utf8_codepoint_span( string $text, int $byte_offset, int $max_code_points, ?int &$found_code_points = 0 ): int { + $was_at = $byte_offset; + $invalid_length = 0; + $end = strlen( $text ); + $found_code_points = 0; + + while ( $byte_offset < $end && $found_code_points < $max_code_points ) { + $needed = $max_code_points - $found_code_points; + $chunk_count = _wp_scan_utf8( $text, $byte_offset, $invalid_length, null, $needed ); + + $found_code_points += $chunk_count; + + // Invalid spans only convey one code point count regardless of how long they are. + if ( 0 !== $invalid_length && $found_code_points < $max_code_points ) { + ++$found_code_points; + $byte_offset += $invalid_length; + } } - } - return $byte_offset - $was_at; + return $byte_offset - $was_at; + } } /** @@ -413,18 +423,20 @@ function _wp_utf8_codepoint_span( string $text, int $byte_offset, int $max_code_ * @param string $text Are there noncharacters in this string? * @return bool Whether noncharacters were found in the string. */ -function _wp_has_noncharacters_fallback( string $text ): bool { - $at = 0; - $invalid_length = 0; - $has_noncharacters = false; - $end = strlen( $text ); - - while ( $at < $end && ! $has_noncharacters ) { - _wp_scan_utf8( $text, $at, $invalid_length, null, null, $has_noncharacters ); - $at += $invalid_length; - } +if ( ! function_exists( __NAMESPACE__ . '\\_wp_has_noncharacters_fallback' ) ) { + function _wp_has_noncharacters_fallback( string $text ): bool { + $at = 0; + $invalid_length = 0; + $has_noncharacters = false; + $end = strlen( $text ); + + while ( $at < $end && ! $has_noncharacters ) { + _wp_scan_utf8( $text, $at, $invalid_length, null, null, $has_noncharacters ); + $at += $invalid_length; + } - return $has_noncharacters; + return $has_noncharacters; + } } /** @@ -439,46 +451,48 @@ function _wp_has_noncharacters_fallback( string $text ): bool { * @param string $iso_8859_1_text Text treated as ISO-8859-1 (latin1) bytes. * @return string Text converted into UTF-8. */ -function _wp_utf8_encode_fallback( $iso_8859_1_text ) { - $iso_8859_1_text = (string) $iso_8859_1_text; - $at = 0; - $was_at = 0; - $end = strlen( $iso_8859_1_text ); - $utf8 = ''; - - while ( $at < $end ) { - // US-ASCII bytes are identical in ISO-8859-1 and UTF-8. These are 0x00–0x7F. - $ascii_byte_count = strspn( - $iso_8859_1_text, - "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" . - "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" . - " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f", - $at - ); - - if ( $ascii_byte_count > 0 ) { - $at += $ascii_byte_count; - continue; - } +if ( ! function_exists( __NAMESPACE__ . '\\_wp_utf8_encode_fallback' ) ) { + function _wp_utf8_encode_fallback( $iso_8859_1_text ) { + $iso_8859_1_text = (string) $iso_8859_1_text; + $at = 0; + $was_at = 0; + $end = strlen( $iso_8859_1_text ); + $utf8 = ''; + + while ( $at < $end ) { + // US-ASCII bytes are identical in ISO-8859-1 and UTF-8. These are 0x00–0x7F. + $ascii_byte_count = strspn( + $iso_8859_1_text, + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" . + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" . + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f", + $at + ); - // All other bytes transform into two-byte UTF-8 sequences. - $code_point = ord( $iso_8859_1_text[ $at ] ); - $byte1 = chr( 0xC0 | ( $code_point >> 6 ) ); - $byte2 = chr( 0x80 | ( $code_point & 0x3F ) ); + if ( $ascii_byte_count > 0 ) { + $at += $ascii_byte_count; + continue; + } - $utf8 .= substr( $iso_8859_1_text, $was_at, $at - $was_at ); - $utf8 .= "{$byte1}{$byte2}"; + // All other bytes transform into two-byte UTF-8 sequences. + $code_point = ord( $iso_8859_1_text[ $at ] ); + $byte1 = chr( 0xC0 | ( $code_point >> 6 ) ); + $byte2 = chr( 0x80 | ( $code_point & 0x3F ) ); - ++$at; - $was_at = $at; - } + $utf8 .= substr( $iso_8859_1_text, $was_at, $at - $was_at ); + $utf8 .= "{$byte1}{$byte2}"; - if ( 0 === $was_at ) { - return $iso_8859_1_text; - } + ++$at; + $was_at = $at; + } - $utf8 .= substr( $iso_8859_1_text, $was_at ); - return $utf8; + if ( 0 === $was_at ) { + return $iso_8859_1_text; + } + + $utf8 .= substr( $iso_8859_1_text, $was_at ); + return $utf8; + } } /** @@ -493,75 +507,77 @@ function _wp_utf8_encode_fallback( $iso_8859_1_text ) { * @param string $utf8_text Text treated as UTF-8 bytes. * @return string Text converted into ISO-8859-1. */ -function _wp_utf8_decode_fallback( $utf8_text ) { - $utf8_text = (string) $utf8_text; - $at = 0; - $was_at = 0; - $end = strlen( $utf8_text ); - $iso_8859_1_text = ''; - - while ( $at < $end ) { - // US-ASCII bytes are identical in ISO-8859-1 and UTF-8. These are 0x00–0x7F. - $ascii_byte_count = strspn( - $utf8_text, - "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" . - "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" . - " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f", - $at - ); - - if ( $ascii_byte_count > 0 ) { - $at += $ascii_byte_count; - continue; - } - - $next_at = $at; - $invalid_length = 0; - $found = _wp_scan_utf8( $utf8_text, $next_at, $invalid_length, null, 1 ); - $span_length = $next_at - $at; - $next_byte = '?'; +if ( ! function_exists( __NAMESPACE__ . '\\_wp_utf8_decode_fallback' ) ) { + function _wp_utf8_decode_fallback( $utf8_text ) { + $utf8_text = (string) $utf8_text; + $at = 0; + $was_at = 0; + $end = strlen( $utf8_text ); + $iso_8859_1_text = ''; + + while ( $at < $end ) { + // US-ASCII bytes are identical in ISO-8859-1 and UTF-8. These are 0x00–0x7F. + $ascii_byte_count = strspn( + $utf8_text, + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" . + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" . + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f", + $at + ); - if ( 1 !== $found ) { - if ( $invalid_length > 0 ) { - $next_byte = ''; - goto flush_sub_part; + if ( $ascii_byte_count > 0 ) { + $at += $ascii_byte_count; + continue; } - break; - } + $next_at = $at; + $invalid_length = 0; + $found = _wp_scan_utf8( $utf8_text, $next_at, $invalid_length, null, 1 ); + $span_length = $next_at - $at; + $next_byte = '?'; - // All convertible code points are two-bytes long. - $byte1 = ord( $utf8_text[ $at ] ); - if ( 0xC0 !== ( $byte1 & 0xE0 ) ) { - goto flush_sub_part; - } + if ( 1 !== $found ) { + if ( $invalid_length > 0 ) { + $next_byte = ''; + goto flush_sub_part; + } - // All convertible code points are not greater than U+FF. - $byte2 = ord( $utf8_text[ $at + 1 ] ); - $code_point = ( ( $byte1 & 0x1F ) << 6 ) | ( ( $byte2 & 0x3F ) ); - if ( $code_point > 0xFF ) { - goto flush_sub_part; - } + break; + } - $next_byte = chr( $code_point ); + // All convertible code points are two-bytes long. + $byte1 = ord( $utf8_text[ $at ] ); + if ( 0xC0 !== ( $byte1 & 0xE0 ) ) { + goto flush_sub_part; + } - flush_sub_part: - $iso_8859_1_text .= substr( $utf8_text, $was_at, $at - $was_at ); - $iso_8859_1_text .= $next_byte; - $at += $span_length; - $was_at = $at; + // All convertible code points are not greater than U+FF. + $byte2 = ord( $utf8_text[ $at + 1 ] ); + $code_point = ( ( $byte1 & 0x1F ) << 6 ) | ( ( $byte2 & 0x3F ) ); + if ( $code_point > 0xFF ) { + goto flush_sub_part; + } + + $next_byte = chr( $code_point ); - if ( $invalid_length > 0 ) { - $iso_8859_1_text .= '?'; - $at += $invalid_length; + flush_sub_part: + $iso_8859_1_text .= substr( $utf8_text, $was_at, $at - $was_at ); + $iso_8859_1_text .= $next_byte; + $at += $span_length; $was_at = $at; + + if ( $invalid_length > 0 ) { + $iso_8859_1_text .= '?'; + $at += $invalid_length; + $was_at = $at; + } } - } - if ( 0 === $was_at ) { - return $utf8_text; - } + if ( 0 === $was_at ) { + return $utf8_text; + } - $iso_8859_1_text .= substr( $utf8_text, $was_at ); - return $iso_8859_1_text; + $iso_8859_1_text .= substr( $utf8_text, $was_at ); + return $iso_8859_1_text; + } } diff --git a/components/Encoding/utf8-encoder.php b/components/Encoding/utf8-encoder.php index 3406b1ea..833f9737 100644 --- a/components/Encoding/utf8-encoder.php +++ b/components/Encoding/utf8-encoder.php @@ -30,40 +30,42 @@ * @param int $codepoint Which code point to convert. * @return string Converted code point, or `�` if invalid. */ -function codepoint_to_utf8_bytes( $codepoint ) { - // Pre-check to ensure a valid code point. - if ( +if ( ! function_exists( __NAMESPACE__ . '\\codepoint_to_utf8_bytes' ) ) { + function codepoint_to_utf8_bytes( $codepoint ) { + // Pre-check to ensure a valid code point. + if ( $codepoint <= 0 || ( $codepoint >= 0xD800 && $codepoint <= 0xDFFF ) || $codepoint > 0x10FFFF - ) { - return '�'; - } + ) { + return '�'; + } - if ( $codepoint <= 0x7F ) { - return chr( $codepoint ); - } + if ( $codepoint <= 0x7F ) { + return chr( $codepoint ); + } - if ( $codepoint <= 0x7FF ) { - $byte1 = chr( ( 0xC0 | ( ( $codepoint >> 6 ) & 0x1F ) ) ); - $byte2 = chr( $codepoint & 0x3F | 0x80 ); + if ( $codepoint <= 0x7FF ) { + $byte1 = chr( ( 0xC0 | ( ( $codepoint >> 6 ) & 0x1F ) ) ); + $byte2 = chr( $codepoint & 0x3F | 0x80 ); - return "{$byte1}{$byte2}"; - } + return "{$byte1}{$byte2}"; + } - if ( $codepoint <= 0xFFFF ) { - $byte1 = chr( ( $codepoint >> 12 ) | 0xE0 ); - $byte2 = chr( ( $codepoint >> 6 ) & 0x3F | 0x80 ); - $byte3 = chr( $codepoint & 0x3F | 0x80 ); + if ( $codepoint <= 0xFFFF ) { + $byte1 = chr( ( $codepoint >> 12 ) | 0xE0 ); + $byte2 = chr( ( $codepoint >> 6 ) & 0x3F | 0x80 ); + $byte3 = chr( $codepoint & 0x3F | 0x80 ); - return "{$byte1}{$byte2}{$byte3}"; - } + return "{$byte1}{$byte2}{$byte3}"; + } - // Any values above U+10FFFF are eliminated above in the pre-check. - $byte1 = chr( ( $codepoint >> 18 ) | 0xF0 ); - $byte2 = chr( ( $codepoint >> 12 ) & 0x3F | 0x80 ); - $byte3 = chr( ( $codepoint >> 6 ) & 0x3F | 0x80 ); - $byte4 = chr( $codepoint & 0x3F | 0x80 ); + // Any values above U+10FFFF are eliminated above in the pre-check. + $byte1 = chr( ( $codepoint >> 18 ) | 0xF0 ); + $byte2 = chr( ( $codepoint >> 12 ) & 0x3F | 0x80 ); + $byte3 = chr( ( $codepoint >> 6 ) & 0x3F | 0x80 ); + $byte4 = chr( $codepoint & 0x3F | 0x80 ); - return "{$byte1}{$byte2}{$byte3}{$byte4}"; + return "{$byte1}{$byte2}{$byte3}{$byte4}"; + } } diff --git a/components/Encoding/utf8.php b/components/Encoding/utf8.php index 2c266703..47584030 100644 --- a/components/Encoding/utf8.php +++ b/components/Encoding/utf8.php @@ -6,45 +6,46 @@ use function WordPress\Encoding\compat\_wp_scrub_utf8_fallback; use function WordPress\Encoding\compat\_wp_has_noncharacters_fallback; -if ( extension_loaded( 'mbstring' ) ) : - /** - * Determines if a given byte string represents a valid UTF-8 encoding. - * - * Note that it’s unlikely for non-UTF-8 data to validate as UTF-8, but - * it is still possible. Many texts are simultaneously valid UTF-8, - * valid US-ASCII, and valid ISO-8859-1 (`latin1`). - * - * Example: - * - * true === wp_is_valid_utf8( '' ); - * true === wp_is_valid_utf8( 'just a test' ); - * true === wp_is_valid_utf8( "\xE2\x9C\x8F" ); // Pencil, U+270F. - * true === wp_is_valid_utf8( "\u{270F}" ); // Pencil, U+270F. - * true === wp_is_valid_utf8( '✏' ); // Pencil, U+270F. - * - * false === wp_is_valid_utf8( "just \xC0 test" ); // Invalid bytes. - * false === wp_is_valid_utf8( "\xE2\x9C" ); // Invalid/incomplete sequences. - * false === wp_is_valid_utf8( "\xC1\xBF" ); // Overlong sequences. - * false === wp_is_valid_utf8( "\xED\xB0\x80" ); // Surrogate halves. - * false === wp_is_valid_utf8( "B\xFCch" ); // ISO-8859-1 high-bytes. - * // E.g. The “ü” in ISO-8859-1 is a single byte 0xFC, - * // but in UTF-8 is the two-byte sequence 0xC3 0xBC. - * - * A “valid” string consists of “well-formed UTF-8 code unit sequence[s],” meaning - * that the bytes conform to the UTF-8 encoding scheme, all characters use the minimal - * byte sequence required by UTF-8, and that no sequence encodes a UTF-16 surrogate - * code point or any character above the representable range. - * - * @see https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G32860 - * - * @since 6.9.0 - * - * @param string $bytes String which might contain text encoded as UTF-8. - * @return bool Whether the provided bytes can decode as valid UTF-8. - */ - function wp_is_valid_utf8( string $bytes ): bool { - return mb_check_encoding( $bytes, 'UTF-8' ); - } +if ( ! function_exists( __NAMESPACE__ . '\\wp_is_valid_utf8' ) ) { + if ( extension_loaded( 'mbstring' ) ) : + /** + * Determines if a given byte string represents a valid UTF-8 encoding. + * + * Note that it’s unlikely for non-UTF-8 data to validate as UTF-8, but + * it is still possible. Many texts are simultaneously valid UTF-8, + * valid US-ASCII, and valid ISO-8859-1 (`latin1`). + * + * Example: + * + * true === wp_is_valid_utf8( '' ); + * true === wp_is_valid_utf8( 'just a test' ); + * true === wp_is_valid_utf8( "\xE2\x9C\x8F" ); // Pencil, U+270F. + * true === wp_is_valid_utf8( "\u{270F}" ); // Pencil, U+270F. + * true === wp_is_valid_utf8( '✏' ); // Pencil, U+270F. + * + * false === wp_is_valid_utf8( "just \xC0 test" ); // Invalid bytes. + * false === wp_is_valid_utf8( "\xE2\x9C" ); // Invalid/incomplete sequences. + * false === wp_is_valid_utf8( "\xC1\xBF" ); // Overlong sequences. + * false === wp_is_valid_utf8( "\xED\xB0\x80" ); // Surrogate halves. + * false === wp_is_valid_utf8( "B\xFCch" ); // ISO-8859-1 high-bytes. + * // E.g. The “ü” in ISO-8859-1 is a single byte 0xFC, + * // but in UTF-8 is the two-byte sequence 0xC3 0xBC. + * + * A “valid” string consists of “well-formed UTF-8 code unit sequence[s],” meaning + * that the bytes conform to the UTF-8 encoding scheme, all characters use the minimal + * byte sequence required by UTF-8, and that no sequence encodes a UTF-16 surrogate + * code point or any character above the representable range. + * + * @see https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G32860 + * + * @since 6.9.0 + * + * @param string $bytes String which might contain text encoded as UTF-8. + * @return bool Whether the provided bytes can decode as valid UTF-8. + */ + function wp_is_valid_utf8( string $bytes ): bool { + return mb_check_encoding( $bytes, 'UTF-8' ); + } else : /** * Fallback function for validating UTF-8. @@ -59,74 +60,76 @@ function wp_is_valid_utf8( string $string ): bool { return _wp_is_valid_utf8_fallback( $string ); } endif; +} -if ( +if ( ! function_exists( __NAMESPACE__ . '\\wp_scrub_utf8' ) ) { + if ( extension_loaded( 'mbstring' ) && // Maximal subpart substitution introduced by php/php-src@04e59c916f12b322ac55f22314e31bd0176d01cb. version_compare( PHP_VERSION, '8.1.6', '>=' ) -) : - /** - * Replaces ill-formed UTF-8 byte sequences with the Unicode Replacement Character. - * - * Knowing what to do in the presence of text encoding issues can be complicated. - * This function replaces invalid spans of bytes to neutralize any corruption that - * may be there and prevent it from causing further problems downstream. - * - * However, it’s not always ideal to replace those bytes. In some settings it may - * be best to leave the invalid bytes in the string so that downstream code can handle - * them in a specific way. Replacing the bytes too early, like escaping for HTML too - * early, can introduce other forms of corruption and data loss. - * - * When in doubt, use this function to replace spans of invalid bytes. - * - * Replacement follows the “maximal subpart” algorithm for secure and interoperable - * strings. This can lead to sequences of multiple replacement characters in a row. - * - * Example: - * - * // Valid strings come through unchanged. - * 'test' === wp_scrub_utf8( 'test' ); - * - * // Invalid sequences of bytes are replaced. - * $invalid = "the byte \xC0 is never allowed in a UTF-8 string."; - * "the byte \u{FFFD} is never allowed in a UTF-8 string." === wp_scrub_utf8( $invalid, true ); - * 'the byte � is never allowed in a UTF-8 string.' === wp_scrub_utf8( $invalid, true ); - * - * // Maximal subparts are replaced individually. - * '.�.' === wp_scrub_utf8( ".\xC0." ); // C0 is never valid. - * '.�.' === wp_scrub_utf8( ".\xE2\x8C." ); // Missing A3 at end. - * '.��.' === wp_scrub_utf8( ".\xE2\x8C\xE2\x8C." ); // Maximal subparts replaced separately. - * '.��.' === wp_scrub_utf8( ".\xC1\xBF." ); // Overlong sequence. - * '.���.' === wp_scrub_utf8( ".\xED\xA0\x80." ); // Surrogate half. - * - * Note! The Unicode Replacement Character is itself a Unicode character (U+FFFD). - * Once a span of invalid bytes has been replaced by one, it will not be possible - * to know whether the replacement character was originally intended to be there - * or if it is the result of scrubbing bytes. It is ideal to leave replacement for - * display only, but some contexts (e.g. generating XML or passing data into a - * large language model) require valid input strings. - * - * @since 6.9.0 - * - * @see https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-5/#G40630 - * - * @param string $text String which is assumed to be UTF-8 but may contain invalid sequences of bytes. - * @return string Input text with invalid sequences of bytes replaced with the Unicode replacement character. - */ - function wp_scrub_utf8( $text ) { - /* - * While it looks like setting the substitute character could fail, - * the internal PHP code will never fail when provided a valid - * code point as a number. In this case, there’s no need to check - * its return value to see if it succeeded. + ) : + /** + * Replaces ill-formed UTF-8 byte sequences with the Unicode Replacement Character. + * + * Knowing what to do in the presence of text encoding issues can be complicated. + * This function replaces invalid spans of bytes to neutralize any corruption that + * may be there and prevent it from causing further problems downstream. + * + * However, it’s not always ideal to replace those bytes. In some settings it may + * be best to leave the invalid bytes in the string so that downstream code can handle + * them in a specific way. Replacing the bytes too early, like escaping for HTML too + * early, can introduce other forms of corruption and data loss. + * + * When in doubt, use this function to replace spans of invalid bytes. + * + * Replacement follows the “maximal subpart” algorithm for secure and interoperable + * strings. This can lead to sequences of multiple replacement characters in a row. + * + * Example: + * + * // Valid strings come through unchanged. + * 'test' === wp_scrub_utf8( 'test' ); + * + * // Invalid sequences of bytes are replaced. + * $invalid = "the byte \xC0 is never allowed in a UTF-8 string."; + * "the byte \u{FFFD} is never allowed in a UTF-8 string." === wp_scrub_utf8( $invalid, true ); + * 'the byte � is never allowed in a UTF-8 string.' === wp_scrub_utf8( $invalid, true ); + * + * // Maximal subparts are replaced individually. + * '.�.' === wp_scrub_utf8( ".\xC0." ); // C0 is never valid. + * '.�.' === wp_scrub_utf8( ".\xE2\x8C." ); // Missing A3 at end. + * '.��.' === wp_scrub_utf8( ".\xE2\x8C\xE2\x8C." ); // Maximal subparts replaced separately. + * '.��.' === wp_scrub_utf8( ".\xC1\xBF." ); // Overlong sequence. + * '.���.' === wp_scrub_utf8( ".\xED\xA0\x80." ); // Surrogate half. + * + * Note! The Unicode Replacement Character is itself a Unicode character (U+FFFD). + * Once a span of invalid bytes has been replaced by one, it will not be possible + * to know whether the replacement character was originally intended to be there + * or if it is the result of scrubbing bytes. It is ideal to leave replacement for + * display only, but some contexts (e.g. generating XML or passing data into a + * large language model) require valid input strings. + * + * @since 6.9.0 + * + * @see https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-5/#G40630 + * + * @param string $text String which is assumed to be UTF-8 but may contain invalid sequences of bytes. + * @return string Input text with invalid sequences of bytes replaced with the Unicode replacement character. */ - $prev_replacement_character = mb_substitute_character(); - mb_substitute_character( 0xFFFD ); - $scrubbed = mb_scrub( $text, 'UTF-8' ); - mb_substitute_character( $prev_replacement_character ); + function wp_scrub_utf8( $text ) { + /* + * While it looks like setting the substitute character could fail, + * the internal PHP code will never fail when provided a valid + * code point as a number. In this case, there’s no need to check + * its return value to see if it succeeded. + */ + $prev_replacement_character = mb_substitute_character(); + mb_substitute_character( 0xFFFD ); + $scrubbed = mb_scrub( $text, 'UTF-8' ); + mb_substitute_character( $prev_replacement_character ); - return $scrubbed; - } + return $scrubbed; + } else : /** * Fallback function for scrubbing UTF-8. @@ -140,50 +143,54 @@ function wp_scrub_utf8( $text ) { return _wp_scrub_utf8_fallback( $text ); } endif; +} -function _wp_can_use_pcre_u( $set = null ) { - static $utf8_pcre = 'reset'; +if ( ! function_exists( __NAMESPACE__ . '\\_wp_can_use_pcre_u' ) ) { + function _wp_can_use_pcre_u( $set = null ) { + static $utf8_pcre = 'reset'; - if ( null !== $set ) { - $utf8_pcre = $set; - } + if ( null !== $set ) { + $utf8_pcre = $set; + } - if ( 'reset' === $utf8_pcre ) { - // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged -- intentional error generated to detect PCRE/u support. - $utf8_pcre = @preg_match( '/^./u', 'a' ); - } + if ( 'reset' === $utf8_pcre ) { + // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged -- intentional error generated to detect PCRE/u support. + $utf8_pcre = @preg_match( '/^./u', 'a' ); + } - return $utf8_pcre; + return $utf8_pcre; + } } -if ( _wp_can_use_pcre_u() ) : - /** - * Returns whether the given string contains Unicode noncharacters. - * - * XML recommends against using noncharacters and HTML forbids their - * use in attribute names. Unicode recommends that they not be used - * in open exchange of data. - * - * Noncharacters are code points within the following ranges: - * - U+FDD0–U+FDEF - * - U+FFFE–U+FFFF - * - U+1FFFE, U+1FFFF, U+2FFFE, U+2FFFF, …, U+10FFFE, U+10FFFF - * - * @see https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-23/#G12612 - * @see https://www.w3.org/TR/xml/#charsets - * @see https://html.spec.whatwg.org/#attributes-2 - * - * @since 6.9.0 - * - * @param string $text Are there noncharacters in this string? - * @return bool Whether noncharacters were found in the string. - */ - function wp_has_noncharacters( string $text ): bool { - return 1 === preg_match( - '/[\x{FDD0}-\x{FDEF}\x{FFFE}\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFFFF}\x{DFFFE}\x{DFFFF}\x{EFFFE}\x{EFFFF}\x{FFFFE}\x{FFFFF}\x{10FFFE}\x{10FFFF}]/u', - $text - ); - } +if ( ! function_exists( __NAMESPACE__ . '\\wp_has_noncharacters' ) ) { + if ( _wp_can_use_pcre_u() ) : + /** + * Returns whether the given string contains Unicode noncharacters. + * + * XML recommends against using noncharacters and HTML forbids their + * use in attribute names. Unicode recommends that they not be used + * in open exchange of data. + * + * Noncharacters are code points within the following ranges: + * - U+FDD0–U+FDEF + * - U+FFFE–U+FFFF + * - U+1FFFE, U+1FFFF, U+2FFFE, U+2FFFF, …, U+10FFFE, U+10FFFF + * + * @see https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-23/#G12612 + * @see https://www.w3.org/TR/xml/#charsets + * @see https://html.spec.whatwg.org/#attributes-2 + * + * @since 6.9.0 + * + * @param string $text Are there noncharacters in this string? + * @return bool Whether noncharacters were found in the string. + */ + function wp_has_noncharacters( string $text ): bool { + return 1 === preg_match( + '/[\x{FDD0}-\x{FDEF}\x{FFFE}\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFFFF}\x{DFFFE}\x{DFFFF}\x{EFFFE}\x{EFFFF}\x{FFFFE}\x{FFFFF}\x{10FFFE}\x{10FFFF}]/u', + $text + ); + } else : /** * Fallback function for detecting noncharacters in a text. @@ -197,6 +204,7 @@ function wp_has_noncharacters( string $text ): bool { return _wp_has_noncharacters_fallback( $text ); } endif; +} /** * Convert a UTF-8 byte sequence to its Unicode codepoint. @@ -205,23 +213,25 @@ function wp_has_noncharacters( string $text ): bool { * * @return int Unicode codepoint. */ -function utf8_ord( string $character ): int { - // Convert the byte sequence to its binary representation. - $bytes = unpack( 'C*', $character ); +if ( ! function_exists( __NAMESPACE__ . '\\utf8_ord' ) ) { + function utf8_ord( string $character ): int { + // Convert the byte sequence to its binary representation. + $bytes = unpack( 'C*', $character ); - // Initialize the codepoint. - $codepoint = 0; + // Initialize the codepoint. + $codepoint = 0; - // Calculate the codepoint based on the number of bytes. - if ( 1 === count( $bytes ) ) { - $codepoint = $bytes[1]; - } elseif ( 2 === count( $bytes ) ) { - $codepoint = ( ( $bytes[1] & 0x1F ) << 6 ) | ( $bytes[2] & 0x3F ); - } elseif ( 3 === count( $bytes ) ) { - $codepoint = ( ( $bytes[1] & 0x0F ) << 12 ) | ( ( $bytes[2] & 0x3F ) << 6 ) | ( $bytes[3] & 0x3F ); - } elseif ( 4 === count( $bytes ) ) { - $codepoint = ( ( $bytes[1] & 0x07 ) << 18 ) | ( ( $bytes[2] & 0x3F ) << 12 ) | ( ( $bytes[3] & 0x3F ) << 6 ) | ( $bytes[4] & 0x3F ); - } + // Calculate the codepoint based on the number of bytes. + if ( 1 === count( $bytes ) ) { + $codepoint = $bytes[1]; + } elseif ( 2 === count( $bytes ) ) { + $codepoint = ( ( $bytes[1] & 0x1F ) << 6 ) | ( $bytes[2] & 0x3F ); + } elseif ( 3 === count( $bytes ) ) { + $codepoint = ( ( $bytes[1] & 0x0F ) << 12 ) | ( ( $bytes[2] & 0x3F ) << 6 ) | ( $bytes[3] & 0x3F ); + } elseif ( 4 === count( $bytes ) ) { + $codepoint = ( ( $bytes[1] & 0x07 ) << 18 ) | ( ( $bytes[2] & 0x3F ) << 12 ) | ( ( $bytes[3] & 0x3F ) << 6 ) | ( $bytes[4] & 0x3F ); + } - return $codepoint; + return $codepoint; + } } diff --git a/components/Filesystem/functions.php b/components/Filesystem/functions.php index df04f65a..7c0a2567 100644 --- a/components/Filesystem/functions.php +++ b/components/Filesystem/functions.php @@ -4,24 +4,26 @@ use ValueError; -function ls_recursive( Filesystem $filesystem, $path = '/' ) { - $tree = array(); - foreach ( $filesystem->ls( $path ) as $item ) { - if ( $filesystem->is_dir( $item ) ) { - $tree[] = array( - 'name' => $item, - 'type' => 'dir', - 'children' => ls_recursive( $filesystem, $item ), - ); - } else { - $tree[] = array( - 'name' => $item, - 'type' => 'file', - ); +if ( ! function_exists( __NAMESPACE__ . '\\ls_recursive' ) ) { + function ls_recursive( Filesystem $filesystem, $path = '/' ) { + $tree = array(); + foreach ( $filesystem->ls( $path ) as $item ) { + if ( $filesystem->is_dir( $item ) ) { + $tree[] = array( + 'name' => $item, + 'type' => 'dir', + 'children' => ls_recursive( $filesystem, $item ), + ); + } else { + $tree[] = array( + 'name' => $item, + 'type' => 'file', + ); + } } - } - return $tree; + return $tree; + } } /** @@ -35,75 +37,77 @@ function ls_recursive( Filesystem $filesystem, $path = '/' ) { * @type bool $recursive Whether to copy the file or directory recursively. * } */ -function copy_between_filesystems( array $args ) { - $source = $args['source_filesystem']; - $source_path = $args['source_path'] ?? '/'; - $destination = $args['target_filesystem']; - $destination_path = $args['target_path'] ?? '/'; - $recursive = $args['recursive'] ?? true; +if ( ! function_exists( __NAMESPACE__ . '\\copy_between_filesystems' ) ) { + function copy_between_filesystems( array $args ) { + $source = $args['source_filesystem']; + $source_path = $args['source_path'] ?? '/'; + $destination = $args['target_filesystem']; + $destination_path = $args['target_path'] ?? '/'; + $recursive = $args['recursive'] ?? true; - if ( $source->is_file( $source_path ) ) { - $destination_dir = wp_unix_dirname( $destination_path ); - if ( ! $destination->is_dir( $destination_dir ) ) { - $destination->mkdir( - $destination_dir, - array( - 'recursive' => true, - ) - ); - } + if ( $source->is_file( $source_path ) ) { + $destination_dir = wp_unix_dirname( $destination_path ); + if ( ! $destination->is_dir( $destination_dir ) ) { + $destination->mkdir( + $destination_dir, + array( + 'recursive' => true, + ) + ); + } - $to_stream = $destination->open_write_stream( $destination_path ); - try { - $from_stream = $source->open_read_stream( $source_path ); + $to_stream = $destination->open_write_stream( $destination_path ); try { - $chunks_written = 0; - while ( ! $from_stream->reached_end_of_data() ) { - $available = $from_stream->pull( 65536 ); - $to_stream->append_bytes( $from_stream->consume( $available ), $to_stream ); - ++$chunks_written; - } - if ( 0 === $chunks_written ) { - // Make sure the file receives at least one chunk. - // so we can be sure it gets created in case the. - // destination filesystem is lazy. - $to_stream->append_bytes( '' ); + $from_stream = $source->open_read_stream( $source_path ); + try { + $chunks_written = 0; + while ( ! $from_stream->reached_end_of_data() ) { + $available = $from_stream->pull( 65536 ); + $to_stream->append_bytes( $from_stream->consume( $available ), $to_stream ); + ++$chunks_written; + } + if ( 0 === $chunks_written ) { + // Make sure the file receives at least one chunk. + // so we can be sure it gets created in case the. + // destination filesystem is lazy. + $to_stream->append_bytes( '' ); + } + } finally { + $from_stream->close_reading(); } } finally { - $from_stream->close_reading(); + $to_stream->close_writing(); } - } finally { - $to_stream->close_writing(); - } - } elseif ( $source->is_dir( $source_path ) ) { - if ( ! $recursive ) { - throw new FilesystemException( 'Cannot copy a directory. Set the option `recursive` to true to copy directories recursively.' ); - } - if ( ! $destination->is_dir( $destination_path ) ) { - $destination->mkdir( - $destination_path, - array( - 'recursive' => true, - ) - ); - } - foreach ( $source->ls( $source_path ) as $item ) { - copy_between_filesystems( - array( - 'source_filesystem' => $source, - 'source_path' => wp_join_unix_paths( $source_path, $item ), - 'target_filesystem' => $destination, - 'target_path' => wp_join_unix_paths( $destination_path, $item ), - ) - ); + } elseif ( $source->is_dir( $source_path ) ) { + if ( ! $recursive ) { + throw new FilesystemException( 'Cannot copy a directory. Set the option `recursive` to true to copy directories recursively.' ); + } + if ( ! $destination->is_dir( $destination_path ) ) { + $destination->mkdir( + $destination_path, + array( + 'recursive' => true, + ) + ); + } + foreach ( $source->ls( $source_path ) as $item ) { + copy_between_filesystems( + array( + 'source_filesystem' => $source, + 'source_path' => wp_join_unix_paths( $source_path, $item ), + 'target_filesystem' => $destination, + 'target_path' => wp_join_unix_paths( $destination_path, $item ), + ) + ); + } + } elseif ( $source->exists( $source_path ) ) { + // For now ignore paths that are neither files nor directories. + // For example, in GitFilesystem that could be a submodule. + return; // No-op, intentionally ignore. + } else { + // When a path does not exist, throw a clear error. + throw new FilesystemException( 'Path does not exist in the source filesystem: ' . $source_path ); } - } elseif ( $source->exists( $source_path ) ) { - // For now ignore paths that are neither files nor directories. - // For example, in GitFilesystem that could be a submodule. - return; // No-op, intentionally ignore. - } else { - // When a path does not exist, throw a clear error. - throw new FilesystemException( 'Path does not exist in the source filesystem: ' . $source_path ); } } @@ -117,31 +121,34 @@ function copy_between_filesystems( array $args ) { * @return int The number of chunks written. * @throws FilesystemException If there's an error during the transfer. */ -function pipe_stream( $from_stream, $to_stream, $chunk_size = 65536 ) { - $chunks_written = 0; - while ( ! $from_stream->reached_end_of_data() ) { - $available = $from_stream->pull( $chunk_size ); - $to_stream->append_bytes( $from_stream->consume( $available ) ); - ++$chunks_written; - } +if ( ! function_exists( __NAMESPACE__ . '\\pipe_stream' ) ) { + function pipe_stream( $from_stream, $to_stream, $chunk_size = 65536 ) { + $chunks_written = 0; + while ( ! $from_stream->reached_end_of_data() ) { + $available = $from_stream->pull( $chunk_size ); + $to_stream->append_bytes( $from_stream->consume( $available ) ); + ++$chunks_written; + } - if ( 0 === $chunks_written ) { - // Make sure the file receives at least one chunk - // so we can be sure it gets created in case the - // destination filesystem is lazy. - $to_stream->append_bytes( '' ); - $chunks_written = 1; - } + if ( 0 === $chunks_written ) { + // Make sure the file receives at least one chunk + // so we can be sure it gets created in case the + // destination filesystem is lazy. + $to_stream->append_bytes( '' ); + $chunks_written = 1; + } - return $chunks_written; + return $chunks_written; + } } +if ( ! function_exists( __NAMESPACE__ . '\\wp_unix_path_segments' ) ) { + function wp_unix_path_segments( $path ) { + $without_dots = wp_unix_path_resolve_dots( $path ); + $without_slashes = trim( $without_dots, '/' ); -function wp_unix_path_segments( $path ) { - $without_dots = wp_unix_path_resolve_dots( $path ); - $without_slashes = trim( $without_dots, '/' ); - - return explode( '/', $without_slashes ); + return explode( '/', $without_slashes ); + } } /** @@ -149,26 +156,28 @@ function wp_unix_path_segments( $path ) { * * Removes any double slashes between path segments. */ -function wp_join_unix_paths( ...$path_segments ) { - $input_starts_with_slash = null; +if ( ! function_exists( __NAMESPACE__ . '\\wp_join_unix_paths' ) ) { + function wp_join_unix_paths( ...$path_segments ) { + $input_starts_with_slash = null; - $paths = array(); - foreach ( $path_segments as $path_segment ) { - if ( '' !== $path_segment ) { - $paths[] = $path_segment; - if ( null === $input_starts_with_slash ) { - $input_starts_with_slash = 0 === strncmp( $path_segment, '/', strlen( '/' ) ); + $paths = array(); + foreach ( $path_segments as $path_segment ) { + if ( '' !== $path_segment ) { + $paths[] = $path_segment; + if ( null === $input_starts_with_slash ) { + $input_starts_with_slash = 0 === strncmp( $path_segment, '/', strlen( '/' ) ); + } } } - } - $path = implode( '/', $paths ); + $path = implode( '/', $paths ); - $result = preg_replace( '#/+#', '/', $path ); - if ( $input_starts_with_slash && 0 !== strncmp( $result, '/', strlen( '/' ) ) ) { - $result = '/' . $result; - } + $result = preg_replace( '#/+#', '/', $path ); + if ( $input_starts_with_slash && 0 !== strncmp( $result, '/', strlen( '/' ) ) ) { + $result = '/' . $result; + } - return $result; + return $result; + } } /** @@ -184,31 +193,33 @@ function wp_join_unix_paths( ...$path_segments ) { * @param string $path The file path that needs cleaning up * @return string The cleaned, absolute path */ -function wp_unix_path_resolve_dots( $path ) { - // Convert to absolute path. - if ( 0 !== strncmp( $path, '/', strlen( '/' ) ) ) { - $path = '/' . $path; - } - - // Resolve . and .. - $parts = explode( '/', $path ); - $normalized = array(); - foreach ( $parts as $part ) { - if ( '.' === $part || '' === $part ) { - continue; +if ( ! function_exists( __NAMESPACE__ . '\\wp_unix_path_resolve_dots' ) ) { + function wp_unix_path_resolve_dots( $path ) { + // Convert to absolute path. + if ( 0 !== strncmp( $path, '/', strlen( '/' ) ) ) { + $path = '/' . $path; } - if ( '..' === $part ) { - array_pop( $normalized ); - continue; + + // Resolve . and .. + $parts = explode( '/', $path ); + $normalized = array(); + foreach ( $parts as $part ) { + if ( '.' === $part || '' === $part ) { + continue; + } + if ( '..' === $part ) { + array_pop( $normalized ); + continue; + } + $normalized[] = $part; } - $normalized[] = $part; - } - $result = implode( '/', $normalized ); - if ( '.' === $result ) { - $result = ''; + $result = implode( '/', $normalized ); + if ( '.' === $result ) { + $result = ''; + } + return $result; } - return $result; } @@ -216,12 +227,14 @@ function wp_unix_path_resolve_dots( $path ) { * Like sys_get_temp_dir(), but returns a path using forward slashes * as separators. */ -function wp_unix_sys_get_temp_dir() { - $path = sys_get_temp_dir(); - if ( '\\' === DIRECTORY_SEPARATOR ) { - $path = str_replace( '\\', '/', $path ); +if ( ! function_exists( __NAMESPACE__ . '\\wp_unix_sys_get_temp_dir' ) ) { + function wp_unix_sys_get_temp_dir() { + $path = sys_get_temp_dir(); + if ( '\\' === DIRECTORY_SEPARATOR ) { + $path = str_replace( '\\', '/', $path ); + } + return $path; } - return $path; } /** @@ -251,39 +264,41 @@ function wp_unix_sys_get_temp_dir() { * @return string * @throws ValueError on $levels < 1 (keeps parity with PHP 8.x). */ -function wp_unix_dirname( string $path, int $levels = 1 ): string { - if ( $levels < 1 ) { - throw new ValueError( 'unix_dirname(): $levels must be >= 1' ); - } +if ( ! function_exists( __NAMESPACE__ . '\\wp_unix_dirname' ) ) { + function wp_unix_dirname( string $path, int $levels = 1 ): string { + if ( $levels < 1 ) { + throw new ValueError( 'unix_dirname(): $levels must be >= 1' ); + } - // treat empty string the same way PHP does. - if ( '' === $path ) { - return ''; - } + // treat empty string the same way PHP does. + if ( '' === $path ) { + return ''; + } - // if the path is nothing but slashes, the result is always "/". - if ( strspn( $path, '/' ) === strlen( $path ) ) { - return 1 === $levels ? '/' : wp_unix_dirname( '/', $levels - 1 ); - } + // if the path is nothing but slashes, the result is always "/". + if ( strspn( $path, '/' ) === strlen( $path ) ) { + return 1 === $levels ? '/' : wp_unix_dirname( '/', $levels - 1 ); + } - // strip trailing slashes (but never the single root slash). - $path = rtrim( $path, '/' ); - if ( '' === $path ) { // happens when the original was just "/". - return 1 === $levels ? '/' : wp_unix_dirname( '/', $levels - 1 ); - } + // strip trailing slashes (but never the single root slash). + $path = rtrim( $path, '/' ); + if ( '' === $path ) { // happens when the original was just "/". + return 1 === $levels ? '/' : wp_unix_dirname( '/', $levels - 1 ); + } - // locate the last slash. - $slash = strrpos( $path, '/' ); - if ( false === $slash ) { // no slash → current dir. - $path = '.'; - } else { - $path = substr( $path, 0, $slash ); // cut off the basename. - $path = rtrim( $path, '/' ); // collapse duplicate slashes. - if ( '' === $path ) { - $path = '/'; // “/foo” → “/”. + // locate the last slash. + $slash = strrpos( $path, '/' ); + if ( false === $slash ) { // no slash → current dir. + $path = '.'; + } else { + $path = substr( $path, 0, $slash ); // cut off the basename. + $path = rtrim( $path, '/' ); // collapse duplicate slashes. + if ( '' === $path ) { + $path = '/'; // “/foo” → “/”. + } } - } - // recurse for additional levels. - return $levels > 1 ? wp_unix_dirname( $path, $levels - 1 ) : $path; + // recurse for additional levels. + return $levels > 1 ? wp_unix_dirname( $path, $levels - 1 ) : $path; + } } diff --git a/components/Git/functions.php b/components/Git/functions.php index 3f9e25db..a4c60893 100644 --- a/components/Git/functions.php +++ b/components/Git/functions.php @@ -4,25 +4,27 @@ use WordPress\Git\Model\TreeEntry; -function get_all_descendant_oids_in_tree( GitRepository $repository, $tree_oid, $options = array() ) { - $oids = array(); - $trees = array( $tree_oid ); +if ( ! function_exists( __NAMESPACE__ . '\\get_all_descendant_oids_in_tree' ) ) { + function get_all_descendant_oids_in_tree( GitRepository $repository, $tree_oid, $options = array() ) { + $oids = array(); + $trees = array( $tree_oid ); - $object_types = $options['object_types'] ?? null; + $object_types = $options['object_types'] ?? null; - while ( ! empty( $trees ) ) { - $tree_hash = array_pop( $trees ); - $tree = $repository->read_object( $tree_hash )->as_tree(); - foreach ( $tree->entries as $entry ) { - if ( TreeEntry::FILE_MODE_DIRECTORY === $entry->get_mode_bucket() ) { - $trees[] = $entry->hash; - } + while ( ! empty( $trees ) ) { + $tree_hash = array_pop( $trees ); + $tree = $repository->read_object( $tree_hash )->as_tree(); + foreach ( $tree->entries as $entry ) { + if ( TreeEntry::FILE_MODE_DIRECTORY === $entry->get_mode_bucket() ) { + $trees[] = $entry->hash; + } - if ( null === $object_types || in_array( $entry->get_mode_bucket(), $object_types ) ) { - $oids[ $entry->hash ] = true; + if ( null === $object_types || in_array( $entry->get_mode_bucket(), $object_types, true ) ) { + $oids[ $entry->hash ] = true; + } } } - } - return array_keys( $oids ); + return array_keys( $oids ); + } } diff --git a/components/Zip/functions.php b/components/Zip/functions.php index b9452c63..77732e82 100644 --- a/components/Zip/functions.php +++ b/components/Zip/functions.php @@ -5,16 +5,18 @@ use WordPress\ByteStream\NotEnoughDataException; use WordPress\ByteStream\ReadStream\ByteReadStream; -function is_zip_file_stream( ByteReadStream $stream ) { - if ( $stream->length() < 4 ) { - return false; - } +if ( ! function_exists( __NAMESPACE__ . '\\is_zip_file_stream' ) ) { + function is_zip_file_stream( ByteReadStream $stream ) { + if ( $stream->length() < 4 ) { + return false; + } - try { - $stream->pull( 4, ByteReadStream::PULL_EXACTLY ); - } catch ( NotEnoughDataException $e ) { - return false; - } + try { + $stream->pull( 4, ByteReadStream::PULL_EXACTLY ); + } catch ( NotEnoughDataException $e ) { + return false; + } - return "PK\x03\x04" === $stream->peek( 4 ); + return "PK\x03\x04" === $stream->peek( 4 ); + } } diff --git a/plugins/push-md/Tests/BootstrapCompatibilityTest.php b/plugins/push-md/Tests/BootstrapCompatibilityTest.php new file mode 100644 index 00000000..8ec1b20c --- /dev/null +++ b/plugins/push-md/Tests/BootstrapCompatibilityTest.php @@ -0,0 +1,77 @@ +markTestSkipped( 'proc_open() is required for the Push MD bootstrap compatibility test.' ); + } + + $project_dir = dirname( __DIR__, 3 ); + $filesystem_functions = $project_dir . '/components/Filesystem/functions.php'; + $dev_bootstrap = dirname( __DIR__ ) . '/push-md-dev-bootstrap.php'; + $code = 'define( "ABSPATH", "/tmp/wp/" );' . + 'require ' . var_export( $filesystem_functions, true ) . ';' . + 'require ' . var_export( $dev_bootstrap, true ) . ';' . + 'echo "loaded\n";'; + + $result = $this->run_php( $code ); + + $this->assertSame( 0, $result['exit_code'], $result['stdout'] . $result['stderr'] ); + $this->assertSame( "loaded\n", $result['stdout'] ); + $this->assertSame( '', $result['stderr'] ); + } + + public function testDevBootstrapLoadsMissingHelpersWhenOlderToolkitFunctionExists() { + if ( ! function_exists( 'proc_open' ) ) { + $this->markTestSkipped( 'proc_open() is required for the Push MD bootstrap compatibility test.' ); + } + + $dev_bootstrap = dirname( __DIR__ ) . '/push-md-dev-bootstrap.php'; + $code = 'namespace WordPress\\Filesystem {' . + 'function ls_recursive( $filesystem, $path = "/" ) { return array(); }' . + '} namespace {' . + 'define( "ABSPATH", "/tmp/wp/" );' . + 'require ' . var_export( $dev_bootstrap, true ) . ';' . + 'echo function_exists( "WordPress\\\\Filesystem\\\\wp_join_unix_paths" ) ? "loaded\n" : "missing\n";' . + '}'; + + $result = $this->run_php( $code ); + + $this->assertSame( 0, $result['exit_code'], $result['stdout'] . $result['stderr'] ); + $this->assertSame( "loaded\n", $result['stdout'] ); + $this->assertSame( '', $result['stderr'] ); + } + + private function run_php( $code ) { + $descriptor_spec = array( + 0 => array( 'pipe', 'r' ), + 1 => array( 'pipe', 'w' ), + 2 => array( 'pipe', 'w' ), + ); + $command = escapeshellarg( PHP_BINARY ) . ' -d display_errors=1 -r ' . escapeshellarg( $code ); + $process = proc_open( $command, $descriptor_spec, $pipes ); + + if ( ! is_resource( $process ) ) { + $this->fail( sprintf( 'Failed to start command: %s', $command ) ); + } + + fclose( $pipes[0] ); + $stdout = stream_get_contents( $pipes[1] ); + $stderr = stream_get_contents( $pipes[2] ); + fclose( $pipes[1] ); + fclose( $pipes[2] ); + + return array( + 'exit_code' => proc_close( $process ), + 'stdout' => $stdout, + 'stderr' => $stderr, + ); + } +} diff --git a/plugins/push-md/push-md-dev-bootstrap.php b/plugins/push-md/push-md-dev-bootstrap.php index 93614667..4877a87e 100644 --- a/plugins/push-md/push-md-dev-bootstrap.php +++ b/plugins/push-md/push-md-dev-bootstrap.php @@ -4,6 +4,8 @@ exit; } +require_once __DIR__ . '/push-md-toolkit-loader.php'; + if ( ! class_exists( 'Composer\\Autoload\\ClassLoader' ) ) { require_once __DIR__ . '/../../vendor/composer/ClassLoader.php'; } @@ -38,5 +40,5 @@ ); foreach ( $pmd_files as $pmd_file ) { - require_once $pmd_file; + pmd_require_toolkit_file( md5( 'push-md:' . $pmd_file ), $pmd_file ); } diff --git a/plugins/push-md/push-md-phar-bootstrap.php b/plugins/push-md/push-md-phar-bootstrap.php index 5a9463f6..2c73319c 100644 --- a/plugins/push-md/push-md-phar-bootstrap.php +++ b/plugins/push-md/push-md-phar-bootstrap.php @@ -4,6 +4,8 @@ exit; } +require_once __DIR__ . '/push-md-toolkit-loader.php'; + $pmd_phar = 'phar://' . __DIR__ . '/php-toolkit.phar'; if ( ! class_exists( 'Composer\\Autoload\\ClassLoader' ) ) { @@ -40,5 +42,5 @@ ); foreach ( $pmd_files as $pmd_file ) { - require_once $pmd_file; + pmd_require_toolkit_file( md5( 'push-md:' . $pmd_file ), $pmd_file ); } diff --git a/plugins/push-md/push-md-toolkit-bootstrap.php b/plugins/push-md/push-md-toolkit-bootstrap.php index 96eef7e8..87abe583 100644 --- a/plugins/push-md/push-md-toolkit-bootstrap.php +++ b/plugins/push-md/push-md-toolkit-bootstrap.php @@ -4,6 +4,8 @@ exit; } +require_once __DIR__ . '/push-md-toolkit-loader.php'; + function pmd_load_core_html_api() { if ( class_exists( 'WP_HTML_Tag_Processor' ) && class_exists( 'WP_HTML_Processor' ) ) { return; @@ -65,8 +67,8 @@ function pmd_load_toolkit_bundle() { $pmd_loader->register( true ); $pmd_files = require $pmd_toolkit . '/vendor/composer/autoload_files.php'; - foreach ( $pmd_files as $pmd_file ) { - require_once $pmd_file; + foreach ( $pmd_files as $pmd_file_identifier => $pmd_file ) { + pmd_require_toolkit_file( $pmd_file_identifier, $pmd_file ); } } diff --git a/plugins/push-md/push-md-toolkit-loader.php b/plugins/push-md/push-md-toolkit-loader.php new file mode 100644 index 00000000..be37e339 --- /dev/null +++ b/plugins/push-md/push-md-toolkit-loader.php @@ -0,0 +1,19 @@ + Date: Fri, 15 May 2026 14:25:36 +0200 Subject: [PATCH 2/5] Handle Push MD info refs errors as Git responses --- .../push-md/Tests/GitServiceDetectionTest.php | 45 +++++++++++++++++++ plugins/push-md/class-pmd-plugin.php | 24 +++++++--- 2 files changed, 64 insertions(+), 5 deletions(-) create mode 100644 plugins/push-md/Tests/GitServiceDetectionTest.php diff --git a/plugins/push-md/Tests/GitServiceDetectionTest.php b/plugins/push-md/Tests/GitServiceDetectionTest.php new file mode 100644 index 00000000..faac437e --- /dev/null +++ b/plugins/push-md/Tests/GitServiceDetectionTest.php @@ -0,0 +1,45 @@ +assertSame( + 'git-upload-pack', + $this->git_service_from_request( '/info/refs?service=git-upload-pack' ) + ); + } + + public function testInfoRefsReceivePackPathIsRecognizedAsGitService() { + $this->assertSame( + 'git-receive-pack', + $this->git_service_from_request( '/info/refs?service=git-receive-pack' ) + ); + } + + public function testInvalidInfoRefsServiceIsRejected() { + $this->assertSame( + '', + $this->git_service_from_request( '/info/refs?service=git-archive' ) + ); + } + + private function git_service_from_request( $git_path ) { + $method = new ReflectionMethod( 'PMD_Plugin', 'git_service_from_request' ); + $method->setAccessible( true ); + + return $method->invoke( null, $git_path, new WP_REST_Request() ); + } +} diff --git a/plugins/push-md/class-pmd-plugin.php b/plugins/push-md/class-pmd-plugin.php index d34e130e..827ad356 100644 --- a/plugins/push-md/class-pmd-plugin.php +++ b/plugins/push-md/class-pmd-plugin.php @@ -307,8 +307,14 @@ private static function current_user_can_read_exported_content() { public static function handle_rest_request( WP_REST_Request $request ) { $previous_error_handler = set_error_handler( array( __CLASS__, 'throw_on_php_warning' ) ); // phpcs:ignore + $git_path = ''; try { + $git_path = self::build_git_path( $request ); + if ( is_wp_error( $git_path ) ) { + return $git_path; + } + if ( ! PMD_Seeder::is_ready() ) { PMD_Seeder::drive( 5 ); } @@ -324,11 +330,6 @@ public static function handle_rest_request( WP_REST_Request $request ) { return $response->to_rest_response(); } - $git_path = self::build_git_path( $request ); - if ( is_wp_error( $git_path ) ) { - return $git_path; - } - $request_body = file_get_contents( 'php://input' ); $repository = self::open_repository(); try { @@ -395,6 +396,14 @@ public static function handle_rest_request( WP_REST_Request $request ) { return $response->to_rest_response(); } catch ( Throwable $exception ) { + $service = self::git_service_from_request( $git_path, $request ); + if ( $service ) { + return self::build_protocol_error_response( + $service, + self::get_throwable_message( $exception ) + ); + } + return new WP_Error( 'pmd_error', self::get_throwable_message( $exception ), @@ -506,6 +515,11 @@ private static function git_service_from_request( $git_path, WP_REST_Request $re return ltrim( $git_path, '/' ); } + $info_refs_prefix = '/info/refs?service='; + if ( 0 === strpos( $git_path, $info_refs_prefix ) ) { + return self::normalize_git_service( substr( $git_path, strlen( $info_refs_prefix ) ) ); + } + return ''; } From 0eeaf57a99b8dc9b991c0a0e6ecb829f57a9fdbf Mon Sep 17 00:00:00 2001 From: artpi Date: Fri, 15 May 2026 14:38:15 +0200 Subject: [PATCH 3/5] Avoid Push MD export collisions for empty slugs --- plugins/push-md/Tests/ExportPathTest.php | 61 ++++++++++++++++++++++++ plugins/push-md/class-pmd-plugin.php | 21 ++++++-- 2 files changed, 78 insertions(+), 4 deletions(-) create mode 100644 plugins/push-md/Tests/ExportPathTest.php diff --git a/plugins/push-md/Tests/ExportPathTest.php b/plugins/push-md/Tests/ExportPathTest.php new file mode 100644 index 00000000..d38c9257 --- /dev/null +++ b/plugins/push-md/Tests/ExportPathTest.php @@ -0,0 +1,61 @@ +assertSame( + 'post/post-4937.md', + PMD_Plugin::build_markdown_path( $this->post( 4937, 'post', '' ) ) + ); + } + + public function testPageWithEmptySlugUsesStableIdFallbackPath() { + $this->assertSame( + 'page/page-3814.md', + PMD_Plugin::build_markdown_path( $this->post( 3814, 'page', '' ) ) + ); + } + + public function testExistingSlugStillDefinesExportPath() { + $this->assertSame( + 'post/amazing-potatoes.md', + PMD_Plugin::build_markdown_path( $this->post( 4163, 'post', 'amazing-potatoes' ) ) + ); + } + + private function post( $id, $post_type, $post_name ) { + $post = new WP_Post(); + $post->ID = $id; + $post->post_type = $post_type; + $post->post_name = $post_name; + $post->post_parent = 0; + + return $post; + } +} diff --git a/plugins/push-md/class-pmd-plugin.php b/plugins/push-md/class-pmd-plugin.php index 827ad356..604f5a7d 100644 --- a/plugins/push-md/class-pmd-plugin.php +++ b/plugins/push-md/class-pmd-plugin.php @@ -1008,7 +1008,7 @@ public static function build_markdown_path( $post_or_type, $slug = null ) { if ( self::is_raw_block_post_type( $post_or_type->post_type ) ) { return self::build_raw_block_path( $post_or_type->post_type, - $post_or_type->post_name, + self::get_export_post_slug( $post_or_type ), self::get_raw_block_post_theme_slug( $post_or_type ) ); } @@ -1021,7 +1021,7 @@ public static function build_markdown_path( $post_or_type, $slug = null ) { return self::build_page_markdown_path( $post_or_type ); } - return ltrim( $post_or_type->post_type . '/' . $post_or_type->post_name . '.md', '/' ); + return ltrim( $post_or_type->post_type . '/' . self::get_export_post_slug( $post_or_type ) . '.md', '/' ); } if ( self::is_raw_block_post_type( $post_or_type ) ) { @@ -1034,8 +1034,21 @@ public static function build_markdown_path( $post_or_type, $slug = null ) { return ltrim( $post_or_type . '/' . $slug . '.md', '/' ); } + private static function get_export_post_slug( WP_Post $post ) { + if ( '' !== $post->post_name ) { + return $post->post_name; + } + + $post_type = sanitize_title( $post->post_type ); + if ( '' === $post_type ) { + $post_type = 'post'; + } + + return $post_type . '-' . intval( $post->ID ); + } + private static function build_page_markdown_path( WP_Post $post ) { - $segments = array( $post->post_name ); + $segments = array( self::get_export_post_slug( $post ) ); $seen = array( intval( $post->ID ) => true ); $parent_id = intval( $post->post_parent ); @@ -1052,7 +1065,7 @@ private static function build_page_markdown_path( WP_Post $post ) { throw new Exception( 'Git export rejected because a WordPress page has a non-exported parent page. Restore, publish, or reparent the child page before cloning.' ); } - array_unshift( $segments, $parent->post_name ); + array_unshift( $segments, self::get_export_post_slug( $parent ) ); $seen[ $parent_id ] = true; $parent_id = intval( $parent->post_parent ); } From 8039c42d0caea16038399944dd33773fac5c7874 Mon Sep 17 00:00:00 2001 From: artpi Date: Fri, 15 May 2026 14:51:45 +0200 Subject: [PATCH 4/5] Keep Push MD slugless fallback paths stable --- plugins/push-md/Tests/ExportPathTest.php | 66 ++++++++++++++ plugins/push-md/class-pmd-plugin.php | 108 +++++++++++++++++++++-- 2 files changed, 167 insertions(+), 7 deletions(-) diff --git a/plugins/push-md/Tests/ExportPathTest.php b/plugins/push-md/Tests/ExportPathTest.php index d38c9257..3d07727b 100644 --- a/plugins/push-md/Tests/ExportPathTest.php +++ b/plugins/push-md/Tests/ExportPathTest.php @@ -24,6 +24,20 @@ function sanitize_title( $title ) { } } +if ( ! function_exists( 'post_type_exists' ) ) { + function post_type_exists( $post_type ) { + return in_array( $post_type, array( 'post', 'page' ), true ); + } +} + +if ( ! function_exists( 'taxonomy_exists' ) ) { + function taxonomy_exists( $taxonomy ) { + unset( $taxonomy ); + + return false; + } +} + require_once dirname( __DIR__ ) . '/class-pmd-plugin.php'; class PMD_Export_Path_Test extends TestCase { @@ -49,6 +63,43 @@ public function testExistingSlugStillDefinesExportPath() { ); } + public function testCurrentSluglessFallbackPathIsAccepted() { + $this->assertTrue( + $this->assert_id_fallback_path_is_current( + 'post/post-4937.md', + $this->post( 4937, 'post', '' ) + ) + ); + } + + public function testCurrentSluglessFallbackPathDoesNotWriteFallbackAsPostSlug() { + $this->assertTrue( + $this->is_current_slugless_fallback_path( + 'post/post-4937.md', + $this->post( 4937, 'post', '' ) + ) + ); + } + + public function testStaleFallbackPathIsRejectedAfterPostReceivesSlug() { + $this->expectException( Exception::class ); + $this->expectExceptionMessage( 'fallback filename is stale' ); + + $this->assert_id_fallback_path_is_current( + 'post/post-4937.md', + $this->post( 4937, 'post', 'test-post-from-cli' ) + ); + } + + public function testFallbackShapedRealSlugIsAcceptedWhenItIsCurrent() { + $this->assertTrue( + $this->assert_id_fallback_path_is_current( + 'post/post-4937.md', + $this->post( 4937, 'post', 'post-4937' ) + ) + ); + } + private function post( $id, $post_type, $post_name ) { $post = new WP_Post(); $post->ID = $id; @@ -58,4 +109,19 @@ private function post( $id, $post_type, $post_name ) { return $post; } + + private function assert_id_fallback_path_is_current( $path, WP_Post $post ) { + $method = new ReflectionMethod( 'PMD_Plugin', 'assert_id_fallback_path_is_current' ); + $method->setAccessible( true ); + $method->invoke( null, $path, $post ); + + return true; + } + + private function is_current_slugless_fallback_path( $path, WP_Post $post ) { + $method = new ReflectionMethod( 'PMD_Plugin', 'is_current_slugless_fallback_path' ); + $method->setAccessible( true ); + + return $method->invoke( null, $path, $post ); + } } diff --git a/plugins/push-md/class-pmd-plugin.php b/plugins/push-md/class-pmd-plugin.php index 604f5a7d..1ca8c233 100644 --- a/plugins/push-md/class-pmd-plugin.php +++ b/plugins/push-md/class-pmd-plugin.php @@ -1039,12 +1039,65 @@ private static function get_export_post_slug( WP_Post $post ) { return $post->post_name; } - $post_type = sanitize_title( $post->post_type ); + return self::get_id_fallback_slug( $post->post_type, $post->ID ); + } + + private static function get_id_fallback_slug( $post_type, $post_id ) { + $post_type = sanitize_title( $post_type ); if ( '' === $post_type ) { $post_type = 'post'; } - return $post_type . '-' . intval( $post->ID ); + return $post_type . '-' . intval( $post_id ); + } + + private static function get_id_from_fallback_slug( $post_type, $slug ) { + $prefix = sanitize_title( $post_type ); + if ( '' === $prefix ) { + $prefix = 'post'; + } + $prefix .= '-'; + if ( 0 !== strpos( $slug, $prefix ) ) { + return 0; + } + + $id = substr( $slug, strlen( $prefix ) ); + if ( ! preg_match( '/^[1-9][0-9]*$/', $id ) ) { + return 0; + } + + return intval( $id ); + } + + private static function path_uses_id_fallback_slug( $path ) { + $post_type = self::path_to_post_type( $path ); + if ( self::is_raw_block_post_type( $post_type ) || 'wp_global_styles' === $post_type || 'wp_guideline' === $post_type ) { + return false; + } + + if ( 'page' === $post_type ) { + foreach ( self::path_to_page_slugs( $path ) as $slug ) { + if ( self::get_id_from_fallback_slug( 'page', $slug ) ) { + return true; + } + } + + return false; + } + + return (bool) self::get_id_from_fallback_slug( $post_type, self::path_to_slug( $path ) ); + } + + private static function is_current_slugless_fallback_path( $path, WP_Post $post ) { + return '' === $post->post_name && self::path_uses_id_fallback_slug( $path ) && self::build_markdown_path( $post ) === $path; + } + + private static function assert_id_fallback_path_is_current( $path, WP_Post $post ) { + if ( ! self::path_uses_id_fallback_slug( $path ) || self::build_markdown_path( $post ) === $path ) { + return; + } + + throw new Exception( 'Push rejected because this fallback filename is stale after WordPress assigned the post a slug. Pull the latest changes and edit the slug-based file path.' ); } private static function build_page_markdown_path( WP_Post $post ) { @@ -1694,12 +1747,15 @@ private static function upsert_post_from_markdown( $path, $markdown, $options = } self::reject_path_identity_frontmatter( $metadata ); - $metadata = self::normalize_supported_frontmatter( + $metadata = self::normalize_supported_frontmatter( $metadata, array( 'id', 'title', 'date', 'status', 'description' ) ); - $post_id = self::find_post_id_by_path_metadata( $path, $metadata ); - $existing_post = $post_id ? get_post( $post_id ) : null; + $post_id = self::find_post_id_by_path_metadata( $path, $metadata ); + $existing_post = $post_id ? get_post( $post_id ) : null; + if ( $existing_post ) { + self::assert_id_fallback_path_is_current( $path, $existing_post ); + } $default_status = $existing_post && 'trash' !== $existing_post->post_status ? $existing_post->post_status : 'draft'; $post_status = self::normalize_frontmatter_status( isset( $metadata['status'] ) ? $metadata['status'] : $default_status @@ -1725,11 +1781,13 @@ private static function upsert_post_from_markdown( $path, $markdown, $options = $postarr = array( 'post_type' => $post_type, - 'post_name' => $slug, 'post_title' => isset( $metadata['title'] ) ? $metadata['title'] : ucwords( str_replace( '-', ' ', $slug ) ), 'post_status' => $post_status, 'post_content' => $result->get_block_markup(), ); + if ( ! $existing_post || ! self::is_current_slugless_fallback_path( $path, $existing_post ) ) { + $postarr['post_name'] = $slug; + } if ( 'page' === $post_type ) { $postarr['post_parent'] = $post_parent; } @@ -2511,6 +2569,11 @@ private static function find_page_id_by_path( $path, $include_trash = true ) { ); if ( empty( $posts ) ) { + $page_id = self::find_slugless_page_id_by_fallback_slug( $slug, $parent_id, $statuses ); + if ( $page_id ) { + return $page_id; + } + if ( ! $include_trash ) { self::reject_unsupported_status_slug_collision( 'page', $slug, self::$supported_post_statuses, $parent_id ); return 0; @@ -2527,6 +2590,11 @@ private static function find_page_id_by_path( $path, $include_trash = true ) { ) ); if ( empty( $posts ) ) { + $page_id = self::find_slugless_page_id_by_fallback_slug( $slug, $parent_id, array( 'trash' ) ); + if ( $page_id ) { + return $page_id; + } + self::reject_unsupported_status_slug_collision( 'page', $slug, @@ -2558,7 +2626,13 @@ private static function resolve_page_parent_id( $parent_slugs, $include_trash = ) ); if ( empty( $parents ) ) { - throw new Exception( 'Push rejected because nested page paths must reference existing WordPress parent pages.' ); + $page_id = self::find_slugless_page_id_by_fallback_slug( $slug, $parent_id, $statuses ); + if ( ! $page_id ) { + throw new Exception( 'Push rejected because nested page paths must reference existing WordPress parent pages.' ); + } + + $parent_id = $page_id; + continue; } $parent_id = intval( $parents[0] ); @@ -2567,6 +2641,26 @@ private static function resolve_page_parent_id( $parent_slugs, $include_trash = return $parent_id; } + private static function find_slugless_page_id_by_fallback_slug( $slug, $parent_id, $statuses ) { + $id = self::get_id_from_fallback_slug( 'page', $slug ); + if ( ! $id ) { + return 0; + } + + $post = get_post( $id ); + if ( + ! $post || + 'page' !== $post->post_type || + '' !== $post->post_name || + intval( $post->post_parent ) !== intval( $parent_id ) || + ! in_array( $post->post_status, $statuses, true ) + ) { + return 0; + } + + return intval( $post->ID ); + } + private static function path_to_page_parent_id( $path, $include_trash = true ) { $slugs = self::path_to_page_slugs( $path ); array_pop( $slugs ); From 53286cc127f499abd7974f3354cf65bb2fa3659b Mon Sep 17 00:00:00 2001 From: artpi Date: Fri, 15 May 2026 15:12:10 +0200 Subject: [PATCH 5/5] Fix Push MD bootstrap tests on Windows --- components/Filesystem/Tests/FunctionsTest.php | 1 - .../Tests/BootstrapCompatibilityTest.php | 42 +++++++++++++++++-- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/components/Filesystem/Tests/FunctionsTest.php b/components/Filesystem/Tests/FunctionsTest.php index 05431e97..1b0f9a81 100644 --- a/components/Filesystem/Tests/FunctionsTest.php +++ b/components/Filesystem/Tests/FunctionsTest.php @@ -4,7 +4,6 @@ use function WordPress\Filesystem\wp_join_unix_paths; use function WordPress\Filesystem\wp_unix_dirname; -use ValueError; class FunctionsTest extends TestCase { public function testBasicPathJoining() { diff --git a/plugins/push-md/Tests/BootstrapCompatibilityTest.php b/plugins/push-md/Tests/BootstrapCompatibilityTest.php index 8ec1b20c..bb9106cf 100644 --- a/plugins/push-md/Tests/BootstrapCompatibilityTest.php +++ b/plugins/push-md/Tests/BootstrapCompatibilityTest.php @@ -55,11 +55,21 @@ private function run_php( $code ) { 1 => array( 'pipe', 'w' ), 2 => array( 'pipe', 'w' ), ); - $command = escapeshellarg( PHP_BINARY ) . ' -d display_errors=1 -r ' . escapeshellarg( $code ); - $process = proc_open( $command, $descriptor_spec, $pipes ); + $script = tempnam( sys_get_temp_dir(), 'pmd-bootstrap-' ); + if ( false === $script || false === file_put_contents( $script, "fail( 'Failed to write temporary PHP script for bootstrap compatibility test.' ); + } + + $command = $this->build_php_command( $script ); + $options = array(); + if ( '\\' === DIRECTORY_SEPARATOR ) { + $options['bypass_shell'] = true; + } + $process = proc_open( $command, $descriptor_spec, $pipes, null, null, $options ); if ( ! is_resource( $process ) ) { - $this->fail( sprintf( 'Failed to start command: %s', $command ) ); + unlink( $script ); + $this->fail( sprintf( 'Failed to start command: %s', $this->format_command_for_message( $command ) ) ); } fclose( $pipes[0] ); @@ -67,11 +77,35 @@ private function run_php( $code ) { $stderr = stream_get_contents( $pipes[2] ); fclose( $pipes[1] ); fclose( $pipes[2] ); + $exit_code = proc_close( $process ); + unlink( $script ); return array( - 'exit_code' => proc_close( $process ), + 'exit_code' => $exit_code, 'stdout' => $stdout, 'stderr' => $stderr, ); } + + private function build_php_command( $script ) { + $args = array( PHP_BINARY, '-d', 'display_errors=1', $script ); + if ( PHP_VERSION_ID >= 70400 ) { + return $args; + } + + $command = ''; + foreach ( $args as $arg ) { + $command .= escapeshellarg( $arg ) . ' '; + } + + return rtrim( $command ); + } + + private function format_command_for_message( $command ) { + if ( is_array( $command ) ) { + return implode( ' ', $command ); + } + + return $command; + } }