From ccdb11186cbb10f1d253c58770cb9b2fbf4b0c97 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 11 Jun 2026 23:40:58 +0200 Subject: [PATCH 1/5] HTML API: Preserve XMP raw text serialization --- .../html-api/class-wp-html-processor.php | 1 + .../html-api/wpHtmlProcessor-serialize.php | 25 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 35d91fad3129c..b05da8a80e99d 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1498,6 +1498,7 @@ public function serialize_token(): string { case 'SCRIPT': case 'STYLE': + case 'XMP': break; default: diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php index e516addb6c314..1aa367fe7283a 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php @@ -134,6 +134,30 @@ public function test_style_contents_are_not_escaped() { ); } + /** + * Ensures that XMP contents are not escaped, as they are not parsed like text nodes are. + * + * XMP contents are parsed as raw text: character references are never decoded. + * Escaping the contents would change the document, e.g. a "<" would be replaced + * by the literal text "<" after serializing and re-parsing. + * + * @ticket 65372 + */ + public function test_xmp_contents_are_not_escaped() { + $normalized = WP_HTML_Processor::normalize( "1 < 2 &amp; apples > or\x00anges" ); + + $this->assertSame( + "1 < 2 &amp; apples > or\u{FFFD}anges", + $normalized, + 'Should have preserved text inside an XMP element, except for replacing NULL bytes.' + ); + $this->assertSame( + $normalized, + WP_HTML_Processor::normalize( $normalized ), + 'Normalizing already-normalized XMP should not escape the raw text again.' + ); + } + public function test_unexpected_closing_tags_are_removed() { $this->assertSame( WP_HTML_Processor::normalize( 'onetwothree' ), @@ -281,6 +305,7 @@ public static function data_tokens_with_null_bytes() { 'Foreign content text' => array( "one\x00two", "one\u{FFFD}two" ), 'SCRIPT content' => array( "", "" ), 'STYLE content' => array( "", "" ), + 'XMP content' => array( "a\x00b", "a\u{FFFD}b" ), 'Comment text' => array( "", "" ), ); } From 58f02a0278b49f35436ecd14a73a9701f1a115ef Mon Sep 17 00:00:00 2001 From: Jonathan Desrosiers Date: Tue, 16 Jun 2026 16:03:20 +0000 Subject: [PATCH 2/5] Build/Test Tools: Output a list of discovered routes. The list of routes to be copied by the `copy:routes` Grunt task is configured dynamically by the `routes:setup` task, which parses the `gutenberg/build/routes/registry.php` file included in the built asset from the `gutenberg` repository. The task currently produces ouitput only when an error is encountered, such as a missing `registry.php` file or invalide route name. This adjusts the task to produce output so that the list of routes being processed is clear. See #65471. git-svn-id: https://develop.svn.wordpress.org/trunk@62508 602fd350-edb4-49c9-b593-d223f7449a82 --- Gruntfile.js | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Gruntfile.js b/Gruntfile.js index 815ccce3af535..2ce79d03bddc6 100644 --- a/Gruntfile.js +++ b/Gruntfile.js @@ -2154,6 +2154,14 @@ module.exports = function(grunt) { ]; } ) ) ); + + grunt.log.writeln( + 'Found ' + routeNames.length + ' route' + ( routeNames.length === 1 ? '' : 's' ) + + ' registered in ' + registryPath + ':' + ); + routeNames.forEach( function( name ) { + grunt.log.writeln( ' - ' + name ); + } ); } ); grunt.registerTask( 'build:gutenberg', [ From f0480f5a3f10afb358cdb4d980be641c299f7207 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jun 2026 17:13:54 +0000 Subject: [PATCH 3/5] HTML API: Ensure tag processor recognizes SCRIPT tag closers. Address edge cases where SCRIPT tag closers were not detected and the processor remained paused on an incomplete token. Developed in https://github.com/WordPress/wordpress-develop/pull/12184. Props jonsurrell, dmsnell. See #65372. git-svn-id: https://develop.svn.wordpress.org/trunk@62509 602fd350-edb4-49c9-b593-d223f7449a82 --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 1 - tests/phpunit/tests/html-api/wpHtmlTagProcessor.php | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index fe8a20c1ea4ee..e41e1120550b5 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1619,7 +1619,6 @@ private function skip_script_data(): bool { ( 'p' === $html[ $at + 4 ] || 'P' === $html[ $at + 4 ] ) && ( 't' === $html[ $at + 5 ] || 'T' === $html[ $at + 5 ] ) ) ) { - ++$at; continue; } diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index a066543d8e11f..fd73ddc43a4ba 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -2116,6 +2116,8 @@ public static function data_script_tag(): Generator { yield 'Script tag with close' => array( "', true ); + yield 'Script text less-than' => array( '', true ); + yield 'Script text less-than solidus' => array( '', true ); yield 'Script data escaped' => array( '', true ); yield 'Script data double-escaped exit (comment)' => array( '', true ); yield 'Script data double-escaped exit (closed ">")' => array( '', true ); From cdc6f6e4867e231579e39278c98a0341e4c307ed Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jun 2026 19:52:17 +0200 Subject: [PATCH 4/5] Improve tests --- .../tests/html-api/wpHtmlProcessor-serialize.php | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php index c22c7612aead3..e6c5cd7da1497 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php @@ -267,18 +267,13 @@ public function test_style_contents_are_not_escaped() { * @ticket 65372 */ public function test_xmp_contents_are_not_escaped() { - $normalized = WP_HTML_Processor::normalize( "1 < 2 &amp; apples > or\x00anges" ); + $normalized = WP_HTML_Processor::normalize( " < > & \" ' \x00 " ); $this->assertSame( - "1 < 2 &amp; apples > or\u{FFFD}anges", + " < > & \" ' \u{FFFD} ", $normalized, 'Should have preserved text inside an XMP element, except for replacing NULL bytes.' ); - $this->assertSame( - $normalized, - WP_HTML_Processor::normalize( $normalized ), - 'Normalizing already-normalized XMP should not escape the raw text again.' - ); } public function test_unexpected_closing_tags_are_removed() { @@ -654,6 +649,7 @@ public static function data_provider_normalized_fuzzer_cases_that_should_be_idem 'Duplicate ALT boundary' => array( '' ), 'NULL byte in SVG child tag' => array( "" ), 'NULL byte before slash in SVG child tag' => array( "" ), + 'XMP generic raw text' => array( " < > & \" ' \x00 " ), ); } From fa7a94a117610bedd1d477a30544e5fb4a2099fe Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jun 2026 19:58:29 +0200 Subject: [PATCH 5/5] Adjust comment --- tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php index efc60d10d28e0..d9d7d7c13394a 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php @@ -258,11 +258,8 @@ public function test_style_contents_are_not_escaped() { } /** - * Ensures that XMP contents are not escaped, as they are not parsed like text nodes are. - * - * XMP contents are parsed as raw text: character references are never decoded. - * Escaping the contents would change the document, e.g. a "<" would be replaced - * by the literal text "<" after serializing and re-parsing. + * XMP contents are parsed using the generic raw text element parsing algorithm. + * Their contents should not be escaped with HTML character references on normalization. * * @ticket 65372 */