diff --git a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php index 04785fe138..bc6eb7a619 100644 --- a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php +++ b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php @@ -1,28 +1,84 @@ |null + */ protected $block_attributes; + + /** + * Whether the current block's attributes have been modified and need to be serialized + * + * @var bool + */ private $block_attributes_updated; + + /** + * Whether the current block token is a closing tag (e.g. ) + * + * @var bool + */ private $block_closer; /** + * Whether the current block is self-closing (e.g. ) + * + * @var bool + */ + private $self_closing_flag; + + /** + * Stack tracking the names of currently open blocks for validation + * + * @var array + */ + private $stack_of_open_blocks = array(); + + /** + * The most recent error encountered while parsing blocks + * + * @var string|null + */ + private $last_block_error; + + /** + * Iterator for traversing nested block attributes * @var \RecursiveIteratorIterator */ private $block_attributes_iterator; + /** + * Gets the type of the current token, adding a special '#block-comment' type + * for WordPress block delimiters. + * + * @return string|null The token type or null if no token + */ public function get_token_type(): ?string { switch ( $this->parser_state ) { case self::STATE_COMMENT: @@ -37,10 +93,96 @@ public function get_token_type(): ?string { } } + /** + * Gets the most recent error encountered while parsing blocks + * + * @return string|null The error message or null if no error + */ + public function get_last_error(): ?string { + return $this->last_block_error; + } + + /** + * Advances past the block closer of the currently matched block and returns + * the HTML content found between the block's opener and closer. + * + * @return string|false The inner HTML content of the block or false if not a block opener. + */ + public function skip_and_get_block_inner_html() { + if ( '#block-comment' !== $this->get_token_type() ) { + return false; + } + + if ( $this->is_block_closer() ) { + return false; + } + + if ( false === WP_HTML_Tag_Processor::set_bookmark( 'block-start' ) ) { + return false; + } + + $starting_block_depth = $this->get_block_depth(); + while ( $this->next_token() ) { + if ( + $this->get_token_type() === '#block-comment' && + $this->is_block_closer() && + $this->get_block_depth() === $starting_block_depth - 1 + ) { + break; + } + } + + if ( false === WP_HTML_Tag_Processor::set_bookmark( 'block-end' ) ) { + WP_HTML_Tag_Processor::release_bookmark( 'block-start' ); + return false; + } + + $inner_html_start = $this->bookmarks['block-start']->start + $this->bookmarks['block-start']->length; + $inner_html_end = $this->bookmarks['block-end']->start - $inner_html_start; + + WP_HTML_Tag_Processor::release_bookmark( 'block-start' ); + WP_HTML_Tag_Processor::release_bookmark( 'block-end' ); + + return substr( + $this->html, + $inner_html_start, + $inner_html_end + ); + } + + /** + * Gets the depth of the currently matched block on the block stack. It only + * considers the parent blocks and not HTML elements. + * + * For example, the paragraph block in the following markup has a depth of 1: + * + * + *
+ * + *

Hello, there

+ * + *
+ * + * + * @return int The number of ancestor blocks + */ + public function get_block_depth() { + return count( $this->stack_of_open_blocks ); + } + + /** + * Gets the names of all currently open blocks from outermost to innermost + * + * @return array List of block names in nesting order + */ + public function get_block_breadcrumbs() { + return $this->stack_of_open_blocks; + } + /** * Returns the name of the block if the current token is a block comment. * - * @return string|false + * @return string|false The block name (e.g. 'core/paragraph') or false if not at a block */ public function get_block_name() { if ( null === $this->block_name ) { @@ -50,6 +192,11 @@ public function get_block_name() { return $this->block_name; } + /** + * Gets all attributes of the current block + * + * @return array|false The block attributes or false if not at a block + */ public function get_block_attributes() { if ( null === $this->block_attributes ) { return false; @@ -58,12 +205,26 @@ public function get_block_attributes() { return $this->block_attributes; } + /** + * Gets a specific attribute value from the current block + * + * @param string $attribute_name The name of the attribute to get + * @return mixed|false The attribute value or false if not found + */ + public function get_block_attribute( $attribute_name ) { + if ( null === $this->block_attributes ) { + return false; + } + + return $this->block_attributes[ $attribute_name ] ?? false; + } + /** * Overwrites all the block attributes of the currently matched block * opener. * - * @param array $attributes The attributes to set. - * @return bool Whether the attributes were set. + * @param array $attributes The new attributes to set + * @return bool Whether the attributes were successfully set */ public function set_block_attributes( $attributes ) { if ( '#block-comment' !== $this->get_token_type() ) { @@ -77,20 +238,61 @@ public function set_block_attributes( $attributes ) { return true; } + /** + * Checks if the currently matched token is a block closer, + * e.g. . + * + * @return bool True if at a block closer. + */ public function is_block_closer() { return $this->block_name !== null && $this->block_closer === true; } + /** + * Checks if the currently matched token is a self-closing block, + * e.g. . + * + * @return bool True if at a self-closing block. + */ + public function is_self_closing_block() { + return $this->block_name !== null && $this->self_closing_flag === true; + } + + /** + * Advances to the next token in the HTML stream. Matches: + * - The regular HTML tokens + * - WordPress block openers + * - WordPress block closers + * - WordPress self-closing blocks + * + * @return bool Whether a token was parsed. + */ public function next_token(): bool { $this->get_updated_html(); - $this->block_name = null; - $this->block_attributes = null; - $this->block_closer = false; - $this->block_attributes_updated = false; + $this->block_name = null; + $this->block_attributes = null; + $this->block_attributes_iterator = null; + $this->block_closer = false; + $this->self_closing_flag = false; + $this->block_attributes_updated = false; - if ( parent::next_token() === false ) { - return false; + while ( true ) { + if ( parent::next_token() === false ) { + return false; + } + + if ( + $this->get_token_type() === '#tag' && ( + $this->get_tag() === 'HTML' || + $this->get_tag() === 'HEAD' || + $this->get_tag() === 'BODY' + ) + ) { + continue; + } + + break; } if ( parent::get_token_type() !== '#comment' ) { @@ -140,29 +342,41 @@ public function next_token(): bool { $name = substr( $text, $name_starts_at, $name_length + 3 ); $at += $name_length; + // Assume no attributes by default. + $attributes = array(); + // Skip the whitespace that follows the block name. $at += strspn( $text, ' \t\f\r\n', $at ); - if ( $at >= strlen( $text ) ) { - // It's a block without attributes. - $this->block_name = $name; + if ( $at < strlen( $text ) ) { + // It may be a self-closing block or a block with attributes. - return true; - } - - // It seems we may have block attributes here. + // However, block closers can be neither – let's short-circuit. + if ( $this->block_closer ) { + return true; + } - // Block closers cannot have attributes. - if ( $this->block_closer ) { - return true; - } + // The rest of the comment can only consist of block attributes + // and an optional solidus character. + $rest = ltrim( substr( $text, $at ) ); + $at = strlen( $text ); + + // Inspect our potential JSON for the self-closing solidus (`/`) character. + $json_maybe = $rest; + if ( substr( $json_maybe, -1 ) === '/' ) { + // Self-closing block () + $this->self_closing_flag = true; + $json_maybe = substr( $json_maybe, 0, -1 ); + } - // Let's try to parse them as JSON. - $json_maybe = substr( $text, $at ); - $attributes = json_decode( $json_maybe, true ); - if ( null === $attributes || ! is_array( $attributes ) ) { - // This comment looked like a block comment, but the attributes didn't - // parse as a JSON array. This means it wasn't a block after all. - return true; + // Let's try to parse attributes as JSON. + if ( strlen( $json_maybe ) > 0 ) { + $attributes = json_decode( $json_maybe, true ); + if ( null === $attributes || ! is_array( $attributes ) ) { + // This comment looked like a block comment, but the attributes didn't + // parse as a JSON array. This means it wasn't a block after all. + return true; + } + } } // We have a block name and a valid attributes array. We may not find a block @@ -171,14 +385,32 @@ public function next_token(): bool { $this->block_name = $name; $this->block_attributes = $attributes; + if ( $this->block_closer ) { + $popped = array_pop( $this->stack_of_open_blocks ); + if ( $popped !== $name ) { + $this->last_block_error = sprintf( 'Block closer %s does not match the last opened block %s.', $name, $popped ); + return false; + } + } elseif ( ! $this->self_closing_flag ) { + array_push( $this->stack_of_open_blocks, $name ); + } + return true; } + /** + * @inheritDoc + */ public function get_updated_html(): string { $this->block_attribute_updates_to_modifiable_text_updates(); return parent::get_updated_html(); } + /** + * Converts block attribute updates into lexical updates. + * + * @return bool Whether any lexical updates were created + */ private function block_attribute_updates_to_modifiable_text_updates() { // Apply block attribute updates, if any. if ( ! $this->block_attributes_updated ) { @@ -206,6 +438,11 @@ private function block_attribute_updates_to_modifiable_text_updates() { return true; } + /** + * Advances to the next block attribute when a block is matched. + * + * @return bool Whether we successfully advanced to the next attribute. + */ public function next_block_attribute() { if ( '#block-comment' !== $this->get_token_type() ) { return false; @@ -234,6 +471,11 @@ public function next_block_attribute() { return false; } + /** + * Gets the key of the currently matched block attribute. + * + * @return string|false The attribute key or false if no attribute was matched + */ public function get_block_attribute_key() { if ( null === $this->block_attributes_iterator || false === $this->block_attributes_iterator->valid() ) { return false; @@ -242,6 +484,11 @@ public function get_block_attribute_key() { return $this->block_attributes_iterator->key(); } + /** + * Gets the value of the currently matched block attribute. + * + * @return mixed|false The attribute value or false if no attribute was matched + */ public function get_block_attribute_value() { if ( null === $this->block_attributes_iterator || false === $this->block_attributes_iterator->valid() ) { return false; @@ -250,17 +497,23 @@ public function get_block_attribute_value() { return $this->block_attributes_iterator->current(); } + /** + * Sets the value of the currently matched block attribute. + * + * @param mixed $new_value The new value to set + * @return bool Whether the value was successfully set + */ public function set_block_attribute_value( $new_value ) { if ( null === $this->block_attributes_iterator || false === $this->block_attributes_iterator->valid() ) { return false; } - $this->block_attributes_iterator->getSubIterator( - $this->block_attributes_iterator->getDepth() - )->offsetSet( - $this->get_block_attribute_key(), - $new_value - ); + $this->block_attributes_iterator + ->getSubIterator( $this->block_attributes_iterator->getDepth() ) + ->offsetSet( + $this->get_block_attribute_key(), + $new_value + ); $this->block_attributes_updated = true; return true; diff --git a/packages/playground/data-liberation/tests/WPBlockMarkupProcessorTests.php b/packages/playground/data-liberation/tests/WPBlockMarkupProcessorTests.php index 3817d63b4d..c4e559500d 100644 --- a/packages/playground/data-liberation/tests/WPBlockMarkupProcessorTests.php +++ b/packages/playground/data-liberation/tests/WPBlockMarkupProcessorTests.php @@ -18,9 +18,9 @@ public function test_finds_block_openers( $markup, $block_name, $block_attribute static public function provider_test_finds_block_openers() { return [ - 'Opener without attributes' => [ '', 'wp:paragraph', null ], - 'Opener without the trailing whitespace' => [ '', 'wp:paragraph', null ], - 'Opener with a lot of trailing whitespace' => [ '', 'wp:paragraph', null ], + 'Opener without attributes' => [ '', 'wp:paragraph', [] ], + 'Opener without the trailing whitespace' => [ '', 'wp:paragraph', [] ], + 'Opener with a lot of trailing whitespace' => [ '', 'wp:paragraph', [] ], 'Opener with attributes' => [ '', 'wp:paragraph', @@ -40,6 +40,34 @@ static public function provider_test_finds_block_openers() { ]; } + /** + * + * @dataProvider provider_test_finds_self_closing_blocks + */ + public function test_finds_self_closing_blocks( $markup, $block_name, $block_attributes ) { + $p = new WP_Block_Markup_Processor( $markup ); + $p->next_token(); + $this->assertEquals( '#block-comment', $p->get_token_type(), 'Failed to identify the block comment' ); + $this->assertEquals( $block_name, $p->get_block_name(), 'Failed to identify the block name' ); + $this->assertEquals( $block_attributes, $p->get_block_attributes(), 'Failed to identify the block attributes' ); + $this->assertTrue( $p->is_self_closing_block(), 'Failed to identify the self-closing block status' ); + } + + static public function provider_test_finds_self_closing_blocks() { + return [ + 'Self-closing block without attributes' => [ + '', + 'wp:spacer', + [] + ], + 'Self-closing block with attributes' => [ + '', + 'wp:spacer', + [ 'height' => '20px' ] + ], + ]; + } + /** * * @dataProvider provider_test_finds_block_closers @@ -100,7 +128,6 @@ static public function provider_test_treat_invalid_block_closers_as_comments() { 'Closer with a line break before whitespace' => [ "", ], 'Closer with attributes' => [ '', ], 'Closer with solidus at the end (before whitespace)' => [ '', ], - 'Closer with solidus at the end (after whitespace)' => [ '', ], ]; }