From 0d34362849c903020cc7a2dfd8243e344dfafc5f Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Mon, 19 Jan 2026 16:47:41 -0700 Subject: [PATCH] Build: Restore block parser in Core. (#10761) Trac ticket: Core-64521 The work in #10638 [[61438]](https://core.trac.wordpress.org/changeset/61438) for Core-64393 removed the block parser classes from Core, which caused numerous scripts to fail because they were missing. Conditional checks were added in #10718 [[61492]](https://core.trac.wordpress.org/changeset/61492) which left WordPress in an inoperable state. This patch restores the block parser in Core, in preparation for work to remove it from Gutenberg (in a separate patch). Ironically, the files were removed because the new build was copying them over from Gutenberg and the intent was to avoid having two sources of truth, but this was previously the existing mechanism, so having done nothing to the parser files would have left the status quo. This patch removes the problems originally created by removing the files. They will not be copied from Gutenberg any more and the only source of truth will be Core. Until removed from Gutenberg, because of the build changes, any changes made on the Gutenberg side will be lost unless manually copied over. Developed in: https://github.com/WordPress/wordpress-develop/pull/10761 Discussed in: https://core.trac.wordpress.org/ticket/64521 Follow-up to [61438], [61492]. Props dmsnell, mcsf, mukesh27, youknowriad. Co-authored-by: Miguel Fonseca Co-authored-by: Mukesh Panchal Co-authored-by: Riad Benguella Github-PR: 10761 Github-PR-URL: https://github.com/WordPress/wordpress-develop/pull/10761 Trac-Ticket: 64521 Trac-Ticket-URL: https://core.trac.wordpress.org/ticket/64521 Branch-Name: build/restore-block-parser --- .gitignore | 3 - src/wp-includes/blocks.php | 4 - .../class-wp-block-parser-block.php | 90 ++++ .../class-wp-block-parser-frame.php | 79 ++++ src/wp-includes/class-wp-block-parser.php | 404 ++++++++++++++++++ src/wp-includes/formatting.php | 5 - src/wp-settings.php | 12 +- tools/gutenberg/copy-gutenberg-build.js | 57 +-- 8 files changed, 581 insertions(+), 73 deletions(-) create mode 100644 src/wp-includes/class-wp-block-parser-block.php create mode 100644 src/wp-includes/class-wp-block-parser-frame.php create mode 100644 src/wp-includes/class-wp-block-parser.php diff --git a/.gitignore b/.gitignore index 330a92ca02c7b..3997df4c9d603 100644 --- a/.gitignore +++ b/.gitignore @@ -38,9 +38,6 @@ wp-tests-config.php /src/wp-includes/blocks/* !/src/wp-includes/blocks/index.php /src/wp-includes/build -/src/wp-includes/class-wp-block-parser.php -/src/wp-includes/class-wp-block-parser-block.php -/src/wp-includes/class-wp-block-parser-frame.php /src/wp-includes/theme.json /packagehash.txt /.gutenberg-hash diff --git a/src/wp-includes/blocks.php b/src/wp-includes/blocks.php index e2c594d7ecfc6..2a9968608106a 100644 --- a/src/wp-includes/blocks.php +++ b/src/wp-includes/blocks.php @@ -2421,10 +2421,6 @@ function parse_blocks( $content ) { */ $parser_class = apply_filters( 'block_parser_class', 'WP_Block_Parser' ); - if ( ! class_exists( $parser_class ) ) { - return array(); - } - $parser = new $parser_class(); return $parser->parse( $content ); } diff --git a/src/wp-includes/class-wp-block-parser-block.php b/src/wp-includes/class-wp-block-parser-block.php new file mode 100644 index 0000000000000..97dd687c1ffe1 --- /dev/null +++ b/src/wp-includes/class-wp-block-parser-block.php @@ -0,0 +1,90 @@ + 3 ) + * + * @since 5.0.0 + * @var array|null + */ + public $attrs; + + /** + * List of inner blocks (of this same class) + * + * @since 5.0.0 + * @var WP_Block_Parser_Block[] + */ + public $innerBlocks; // phpcs:ignore WordPress.NamingConventions.ValidVariableName + + /** + * Resultant HTML from inside block comment delimiters + * after removing inner blocks + * + * @example "...Just testing..." -> "Just testing..." + * + * @since 5.0.0 + * @var string + */ + public $innerHTML; // phpcs:ignore WordPress.NamingConventions.ValidVariableName + + /** + * List of string fragments and null markers where inner blocks were found + * + * @example array( + * 'innerHTML' => 'BeforeInnerAfter', + * 'innerBlocks' => array( block, block ), + * 'innerContent' => array( 'Before', null, 'Inner', null, 'After' ), + * ) + * + * @since 5.0.0 + * @var array + */ + public $innerContent; // phpcs:ignore WordPress.NamingConventions.ValidVariableName + + /** + * Constructor. + * + * Will populate object properties from the provided arguments. + * + * @since 5.0.0 + * + * @param string $name Name of block. + * @param array $attrs Optional set of attributes from block comment delimiters. + * @param array $inner_blocks List of inner blocks (of this same class). + * @param string $inner_html Resultant HTML from inside block comment delimiters after removing inner blocks. + * @param array $inner_content List of string fragments and null markers where inner blocks were found. + */ + public function __construct( $name, $attrs, $inner_blocks, $inner_html, $inner_content ) { + $this->blockName = $name; // phpcs:ignore WordPress.NamingConventions.ValidVariableName + $this->attrs = $attrs; + $this->innerBlocks = $inner_blocks; // phpcs:ignore WordPress.NamingConventions.ValidVariableName + $this->innerHTML = $inner_html; // phpcs:ignore WordPress.NamingConventions.ValidVariableName + $this->innerContent = $inner_content; // phpcs:ignore WordPress.NamingConventions.ValidVariableName + } +} diff --git a/src/wp-includes/class-wp-block-parser-frame.php b/src/wp-includes/class-wp-block-parser-frame.php new file mode 100644 index 0000000000000..6ab5dd3087dfb --- /dev/null +++ b/src/wp-includes/class-wp-block-parser-frame.php @@ -0,0 +1,79 @@ +block = $block; + $this->token_start = $token_start; + $this->token_length = $token_length; + $this->prev_offset = isset( $prev_offset ) ? $prev_offset : $token_start + $token_length; + $this->leading_html_start = $leading_html_start; + } +} diff --git a/src/wp-includes/class-wp-block-parser.php b/src/wp-includes/class-wp-block-parser.php new file mode 100644 index 0000000000000..bf8a59249d99d --- /dev/null +++ b/src/wp-includes/class-wp-block-parser.php @@ -0,0 +1,404 @@ +This is inside a block!" + * + * @since 5.0.0 + * @var string + */ + public $document; + + /** + * Tracks parsing progress through document + * + * @since 5.0.0 + * @var int + */ + public $offset; + + /** + * List of parsed blocks + * + * @since 5.0.0 + * @var array[] + */ + public $output; + + /** + * Stack of partially-parsed structures in memory during parse + * + * @since 5.0.0 + * @var WP_Block_Parser_Frame[] + */ + public $stack; + + /** + * Parses a document and returns a list of block structures + * + * When encountering an invalid parse will return a best-effort + * parse. In contrast to the specification parser this does not + * return an error on invalid inputs. + * + * @since 5.0.0 + * + * @param string $document Input document being parsed. + * @return array[] + */ + public function parse( $document ) { + $this->document = $document; + $this->offset = 0; + $this->output = array(); + $this->stack = array(); + + while ( $this->proceed() ) { + continue; + } + + return $this->output; + } + + /** + * Processes the next token from the input document + * and returns whether to proceed eating more tokens + * + * This is the "next step" function that essentially + * takes a token as its input and decides what to do + * with that token before descending deeper into a + * nested block tree or continuing along the document + * or breaking out of a level of nesting. + * + * @internal + * @since 5.0.0 + * @return bool + */ + public function proceed() { + $next_token = $this->next_token(); + list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token; + $stack_depth = count( $this->stack ); + + // we may have some HTML soup before the next block. + $leading_html_start = $start_offset > $this->offset ? $this->offset : null; + + switch ( $token_type ) { + case 'no-more-tokens': + // if not in a block then flush output. + if ( 0 === $stack_depth ) { + $this->add_freeform(); + return false; + } + + /* + * Otherwise we have a problem + * This is an error + * + * we have options + * - treat it all as freeform text + * - assume an implicit closer (easiest when not nesting) + */ + + // for the easy case we'll assume an implicit closer. + if ( 1 === $stack_depth ) { + $this->add_block_from_stack(); + return false; + } + + /* + * for the nested case where it's more difficult we'll + * have to assume that multiple closers are missing + * and so we'll collapse the whole stack piecewise + */ + while ( 0 < count( $this->stack ) ) { + $this->add_block_from_stack(); + } + return false; + + case 'void-block': + /* + * easy case is if we stumbled upon a void block + * in the top-level of the document + */ + if ( 0 === $stack_depth ) { + if ( isset( $leading_html_start ) ) { + $this->output[] = (array) $this->freeform( + substr( + $this->document, + $leading_html_start, + $start_offset - $leading_html_start + ) + ); + } + + $this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ); + $this->offset = $start_offset + $token_length; + return true; + } + + // otherwise we found an inner block. + $this->add_inner_block( + new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ), + $start_offset, + $token_length + ); + $this->offset = $start_offset + $token_length; + return true; + + case 'block-opener': + // track all newly-opened blocks on the stack. + array_push( + $this->stack, + new WP_Block_Parser_Frame( + new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ), + $start_offset, + $token_length, + $start_offset + $token_length, + $leading_html_start + ) + ); + $this->offset = $start_offset + $token_length; + return true; + + case 'block-closer': + /* + * if we're missing an opener we're in trouble + * This is an error + */ + if ( 0 === $stack_depth ) { + /* + * we have options + * - assume an implicit opener + * - assume _this_ is the opener + * - give up and close out the document + */ + $this->add_freeform(); + return false; + } + + // if we're not nesting then this is easy - close the block. + if ( 1 === $stack_depth ) { + $this->add_block_from_stack( $start_offset ); + $this->offset = $start_offset + $token_length; + return true; + } + + /* + * otherwise we're nested and we have to close out the current + * block and add it as a new innerBlock to the parent + */ + $stack_top = array_pop( $this->stack ); + $html = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset ); + $stack_top->block->innerHTML .= $html; + $stack_top->block->innerContent[] = $html; + $stack_top->prev_offset = $start_offset + $token_length; + + $this->add_inner_block( + $stack_top->block, + $stack_top->token_start, + $stack_top->token_length, + $start_offset + $token_length + ); + $this->offset = $start_offset + $token_length; + return true; + + default: + // This is an error. + $this->add_freeform(); + return false; + } + } + + /** + * Scans the document from where we last left off + * and finds the next valid token to parse if it exists + * + * Returns the type of the find: kind of find, block information, attributes + * + * @internal + * @since 5.0.0 + * @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments + * @return array + */ + public function next_token() { + $matches = null; + + /* + * aye the magic + * we're using a single RegExp to tokenize the block comment delimiters + * we're also using a trick here because the only difference between a + * block opener and a block closer is the leading `/` before `wp:` (and + * a closer has no attributes). we can trap them both and process the + * match back in PHP to see which one it was. + */ + $has_match = preg_match( + '/).)*+)?}\s+)?(?P\/)?-->/s', + $this->document, + $matches, + PREG_OFFSET_CAPTURE, + $this->offset + ); + + // if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE. + if ( false === $has_match ) { + return array( 'no-more-tokens', null, null, null, null ); + } + + // we have no more tokens. + if ( 0 === $has_match ) { + return array( 'no-more-tokens', null, null, null, null ); + } + + list( $match, $started_at ) = $matches[0]; + + $length = strlen( $match ); + $is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1]; + $is_void = isset( $matches['void'] ) && -1 !== $matches['void'][1]; + $namespace = $matches['namespace']; + $namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/'; + $name = $namespace . $matches['name'][0]; + $has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1]; + + /* + * Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays + * are associative arrays. If we use `array()` we get a JSON `[]` + */ + $attrs = $has_attrs + ? json_decode( $matches['attrs'][0], /* as-associative */ true ) + : array(); + + /* + * This state isn't allowed + * This is an error + */ + if ( $is_closer && ( $is_void || $has_attrs ) ) { + // we can ignore them since they don't hurt anything. + } + + if ( $is_void ) { + return array( 'void-block', $name, $attrs, $started_at, $length ); + } + + if ( $is_closer ) { + return array( 'block-closer', $name, null, $started_at, $length ); + } + + return array( 'block-opener', $name, $attrs, $started_at, $length ); + } + + /** + * Returns a new block object for freeform HTML + * + * @internal + * @since 5.0.0 + * + * @param string $inner_html HTML content of block. + * @return WP_Block_Parser_Block freeform block object. + */ + public function freeform( $inner_html ) { + return new WP_Block_Parser_Block( null, array(), array(), $inner_html, array( $inner_html ) ); + } + + /** + * Pushes a length of text from the input document + * to the output list as a freeform block. + * + * @internal + * @since 5.0.0 + * @param null $length how many bytes of document text to output. + */ + public function add_freeform( $length = null ) { + $length = $length ? $length : strlen( $this->document ) - $this->offset; + + if ( 0 === $length ) { + return; + } + + $this->output[] = (array) $this->freeform( substr( $this->document, $this->offset, $length ) ); + } + + /** + * Given a block structure from memory pushes + * a new block to the output list. + * + * @internal + * @since 5.0.0 + * @param WP_Block_Parser_Block $block The block to add to the output. + * @param int $token_start Byte offset into the document where the first token for the block starts. + * @param int $token_length Byte length of entire block from start of opening token to end of closing token. + * @param int|null $last_offset Last byte offset into document if continuing form earlier output. + */ + public function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) { + $parent = $this->stack[ count( $this->stack ) - 1 ]; + $parent->block->innerBlocks[] = (array) $block; + $html = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset ); + + if ( ! empty( $html ) ) { + $parent->block->innerHTML .= $html; + $parent->block->innerContent[] = $html; + } + + $parent->block->innerContent[] = null; + $parent->prev_offset = $last_offset ? $last_offset : $token_start + $token_length; + } + + /** + * Pushes the top block from the parsing stack to the output list. + * + * @internal + * @since 5.0.0 + * @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML. + */ + public function add_block_from_stack( $end_offset = null ) { + $stack_top = array_pop( $this->stack ); + $prev_offset = $stack_top->prev_offset; + + $html = isset( $end_offset ) + ? substr( $this->document, $prev_offset, $end_offset - $prev_offset ) + : substr( $this->document, $prev_offset ); + + if ( ! empty( $html ) ) { + $stack_top->block->innerHTML .= $html; + $stack_top->block->innerContent[] = $html; + } + + if ( isset( $stack_top->leading_html_start ) ) { + $this->output[] = (array) $this->freeform( + substr( + $this->document, + $stack_top->leading_html_start, + $stack_top->token_start - $stack_top->leading_html_start + ) + ); + } + + $this->output[] = (array) $stack_top->block; + } +} + +/** + * WP_Block_Parser_Block class. + * + * Required for backward compatibility in WordPress Core. + */ +require_once __DIR__ . '/class-wp-block-parser-block.php'; + +/** + * WP_Block_Parser_Frame class. + * + * Required for backward compatibility in WordPress Core. + */ +require_once __DIR__ . '/class-wp-block-parser-frame.php'; diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php index b3a5a1ca135b4..f59f877775b77 100644 --- a/src/wp-includes/formatting.php +++ b/src/wp-includes/formatting.php @@ -5227,11 +5227,6 @@ function wp_pre_kses_less_than_callback( $matches ) { * @return string Filtered text to run through KSES. */ function wp_pre_kses_block_attributes( $content, $allowed_html, $allowed_protocols ) { - // If the block parser isn't available, skip block attribute filtering. - if ( ! class_exists( 'WP_Block_Parser' ) ) { - return $content; - } - /* * `filter_block_content` is expected to call `wp_kses`. Temporarily remove * the filter to avoid recursion. diff --git a/src/wp-settings.php b/src/wp-settings.php index 207a69e258247..60c220100f539 100644 --- a/src/wp-settings.php +++ b/src/wp-settings.php @@ -377,15 +377,9 @@ require ABSPATH . WPINC . '/class-wp-block.php'; require ABSPATH . WPINC . '/class-wp-block-list.php'; require ABSPATH . WPINC . '/class-wp-block-metadata-registry.php'; -if ( file_exists( ABSPATH . WPINC . '/class-wp-block-parser-block.php' ) ) { - require ABSPATH . WPINC . '/class-wp-block-parser-block.php'; -} -if ( file_exists( ABSPATH . WPINC . '/class-wp-block-parser-frame.php' ) ) { - require ABSPATH . WPINC . '/class-wp-block-parser-frame.php'; -} -if ( file_exists( ABSPATH . WPINC . '/class-wp-block-parser.php' ) ) { - require ABSPATH . WPINC . '/class-wp-block-parser.php'; -} +require ABSPATH . WPINC . '/class-wp-block-parser-block.php'; +require ABSPATH . WPINC . '/class-wp-block-parser-frame.php'; +require ABSPATH . WPINC . '/class-wp-block-parser.php'; require ABSPATH . WPINC . '/class-wp-classic-to-block-menu-converter.php'; require ABSPATH . WPINC . '/class-wp-navigation-fallback.php'; require ABSPATH . WPINC . '/block-bindings.php'; diff --git a/tools/gutenberg/copy-gutenberg-build.js b/tools/gutenberg/copy-gutenberg-build.js index a66ca113e0cc2..aa30d92264bf9 100644 --- a/tools/gutenberg/copy-gutenberg-build.js +++ b/tools/gutenberg/copy-gutenberg-build.js @@ -91,22 +91,6 @@ const COPY_CONFIG = { ], }, - // PHP source files (non-block files, copied from packages) - phpSource: { - files: [ - { - // Block parser classes - package: 'block-serialization-default-parser', - files: [ - 'class-wp-block-parser.php', - 'class-wp-block-parser-block.php', - 'class-wp-block-parser-frame.php', - ], - destination: '', // Root of wp-includes - }, - ], - }, - // Theme JSON files (from Gutenberg lib directory) themeJson: { files: [ @@ -1028,38 +1012,7 @@ async function main() { ); copyBlockAssets( COPY_CONFIG.blocks ); - // 6. Copy non-block PHP source files (from packages) - console.log( '\n📦 Copying non-block PHP files...' ); - const phpSourceConfig = COPY_CONFIG.phpSource; - - for ( const fileGroup of phpSourceConfig.files ) { - const packageSrc = path.join( gutenbergPackagesDir, fileGroup.package ); - - if ( ! fs.existsSync( packageSrc ) ) { - console.log( ` ⚠️ Package not found: ${ fileGroup.package }` ); - continue; - } - - for ( const file of fileGroup.files ) { - const src = path.join( packageSrc, file ); - const dest = path.join( - wpIncludesDir, - fileGroup.destination, - file - ); - - if ( fs.existsSync( src ) ) { - fs.mkdirSync( path.dirname( dest ), { recursive: true } ); - let content = fs.readFileSync( src, 'utf8' ); - fs.writeFileSync( dest, content ); - } - } - console.log( - ` ✅ ${ fileGroup.package } (${ fileGroup.files.length } files)` - ); - } - - // 7. Copy theme JSON files (from Gutenberg lib directory) + // 6. Copy theme JSON files (from Gutenberg lib directory) console.log( '\n📦 Copying theme JSON files...' ); const themeJsonConfig = COPY_CONFIG.themeJson; const gutenbergLibDir = path.join( gutenbergDir, 'lib' ); @@ -1086,19 +1039,19 @@ async function main() { } } - // 9. Generate script-modules-packages.min.php from individual asset files + // 7. Generate script-modules-packages.min.php from individual asset files console.log( '\n📦 Generating script-modules-packages.min.php...' ); generateScriptModulesPackages(); - // 10. Generate script-loader-packages.min.php + // 8. Generate script-loader-packages.min.php console.log( '\n📦 Generating script-loader-packages.min.php...' ); generateScriptLoaderPackages(); - // 11. Generate require-dynamic-blocks.php and require-static-blocks.php + // 9. Generate require-dynamic-blocks.php and require-static-blocks.php console.log( '\n📦 Generating block registration files...' ); generateBlockRegistrationFiles(); - // 12. Generate blocks-json.php from block.json files + // 10. Generate blocks-json.php from block.json files console.log( '\n📦 Generating blocks-json.php...' ); generateBlocksJson();