Make WordPress Core

Changeset 58713


Ignore:
Timestamp:
07/12/2024 10:18:16 PM (2 months ago)
Author:
dmsnell
Message:

HTML API: Simplify breadcrumb accounting.

Since the HTML Processor started visiting all nodes in a document, both
real and virtual, the breadcrumb accounting became a bit complicated
and it's not entirely clear that it is fully reliable.

In this patch the breadcrumbs are rebuilt separately from the stack of
open elements in order to eliminate the problem of the stateful stack
interactions and the post-hoc event queue.

Breadcrumbs are greatly simplified as a result, and more verifiably
correct, in this construction.

Developed in https://github.com/WordPress/wordpress-develop/pull/6981
Discussed in https://core.trac.wordpress.org/ticket/61576

Follow-up to [58590].

Props bernhard-reiter, dmsnell.
See #61576.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-processor.php

    r58702 r58713  
    213213
    214214    /**
     215     * Stores the current breadcrumbs.
     216     *
     217     * @since 6.7.0
     218     *
     219     * @var string[]
     220     */
     221    private $breadcrumbs = array();
     222
     223    /**
    215224     * Current stack event, if set, representing a matched token.
    216225     *
     
    311320        );
    312321
    313         $processor->state->stack_of_open_elements->push( $context_node );
    314322        $processor->context_node = $context_node;
     323        $processor->breadcrumbs  = array( 'HTML', $context_node->node_name );
    315324
    316325        return $processor;
     
    524533        }
    525534
    526         if ( 'done' !== $this->has_seen_context_node && 0 === count( $this->element_queue ) && ! $this->step() ) {
    527             while ( 'context-node' !== $this->state->stack_of_open_elements->current_node()->bookmark_name && $this->state->stack_of_open_elements->pop() ) {
    528                 continue;
    529             }
    530             $this->has_seen_context_node = 'done';
    531             return $this->next_token();
    532         }
    533 
     535        /*
     536         * Prime the events if there are none.
     537         *
     538         * @todo In some cases, probably related to the adoption agency
     539         *       algorithm, this call to step() doesn't create any new
     540         *       events. Calling it again creates them. Figure out why
     541         *       this is and if it's inherent or if it's a bug. Looping
     542         *       until there are events or until there are no more
     543         *       tokens works in the meantime and isn't obviously wrong.
     544         */
     545        while ( empty( $this->element_queue ) && $this->step() ) {
     546            continue;
     547        }
     548
     549        // Process the next event on the queue.
    534550        $this->current_element = array_shift( $this->element_queue );
    535         while ( isset( $this->context_node ) && ! $this->has_seen_context_node ) {
    536             if ( isset( $this->current_element ) ) {
    537                 if ( $this->context_node === $this->current_element->token && WP_HTML_Stack_Event::PUSH === $this->current_element->operation ) {
    538                     $this->has_seen_context_node = true;
    539                     return $this->next_token();
    540                 }
    541             }
    542             $this->current_element = array_shift( $this->element_queue );
    543         }
    544 
    545551        if ( ! isset( $this->current_element ) ) {
    546             if ( 'done' === $this->has_seen_context_node ) {
    547                 return false;
    548             } else {
    549                 return $this->next_token();
    550             }
    551         }
    552 
    553         if ( isset( $this->context_node ) && WP_HTML_Stack_Event::POP === $this->current_element->operation && $this->context_node === $this->current_element->token ) {
    554             $this->element_queue   = array();
    555             $this->current_element = null;
    556552            return false;
    557553        }
    558554
     555        $is_pop = WP_HTML_Stack_Event::POP === $this->current_element->operation;
     556
     557        /*
     558         * The root node only exists in the fragment parser, and closing it
     559         * indicates that the parse is complete. Stop before popping if from
     560         * the breadcrumbs.
     561         */
     562        if ( 'root-node' === $this->current_element->token->bookmark_name ) {
     563            return ! $is_pop && $this->next_token();
     564        }
     565
     566        // Adjust the breadcrumbs for this event.
     567        if ( $is_pop ) {
     568            array_pop( $this->breadcrumbs );
     569        } else {
     570            $this->breadcrumbs[] = $this->current_element->token->node_name;
     571        }
     572
    559573        // Avoid sending close events for elements which don't expect a closing.
    560         if (
    561             WP_HTML_Stack_Event::POP === $this->current_element->operation &&
    562             ! static::expects_closer( $this->current_element->token )
    563         ) {
     574        if ( $is_pop && ! static::expects_closer( $this->current_element->token ) ) {
    564575            return $this->next_token();
    565576        }
     
    644655        }
    645656
    646         foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
     657        for ( $i = count( $this->breadcrumbs ) - 1; $i >= 0; $i-- ) {
     658            $node  = $this->breadcrumbs[ $i ];
    647659            $crumb = strtoupper( current( $breadcrumbs ) );
    648660
    649             if ( '*' !== $crumb && $node->node_name !== $crumb ) {
     661            if ( '*' !== $crumb && $node !== $crumb ) {
    650662                return false;
    651663            }
     
    863875     */
    864876    public function get_breadcrumbs() {
    865         $breadcrumbs = array();
    866 
    867         foreach ( $this->state->stack_of_open_elements->walk_down() as $stack_item ) {
    868             $breadcrumbs[] = $stack_item->node_name;
    869         }
    870 
    871         if ( ! $this->is_virtual() ) {
    872             return $breadcrumbs;
    873         }
    874 
    875         foreach ( $this->element_queue as $queue_item ) {
    876             if ( $this->current_element->token->bookmark_name === $queue_item->token->bookmark_name ) {
    877                 break;
    878             }
    879 
    880             if ( 'context-node' === $queue_item->token->bookmark_name ) {
    881                 break;
    882             }
    883 
    884             if ( 'real' === $queue_item->provenance ) {
    885                 break;
    886             }
    887 
    888             if ( WP_HTML_Stack_Event::PUSH === $queue_item->operation ) {
    889                 $breadcrumbs[] = $queue_item->token->node_name;
    890             } else {
    891                 array_pop( $breadcrumbs );
    892             }
    893         }
    894 
    895         if ( null !== parent::get_token_name() && ! parent::is_tag_closer() ) {
    896             array_pop( $breadcrumbs );
    897         }
    898 
    899         // Add the virtual node we're at.
    900         if ( WP_HTML_Stack_Event::PUSH === $this->current_element->operation ) {
    901             $breadcrumbs[] = $this->current_element->token->node_name;
    902         }
    903 
    904         return $breadcrumbs;
     877        return $this->breadcrumbs;
    905878    }
    906879
     
    931904     */
    932905    public function get_current_depth() {
    933         return $this->is_virtual()
    934             ? count( $this->get_breadcrumbs() )
    935             : $this->state->stack_of_open_elements->count();
     906        return count( $this->breadcrumbs );
    936907    }
    937908
     
    25532524            : 0;
    25542525        $bookmark_starts_at   = $this->bookmarks[ $actual_bookmark_name ]->start;
    2555         $bookmark_length      = $this->bookmarks[ $actual_bookmark_name ]->length;
    25562526        $direction            = $bookmark_starts_at > $processor_started_at ? 'forward' : 'backward';
    25572527
     
    26112581            $this->element_queue         = array();
    26122582            $this->current_element       = null;
     2583
     2584            if ( isset( $this->context_node ) ) {
     2585                $this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 );
     2586            } else {
     2587                $this->breadcrumbs = array();
     2588            }
    26132589        }
    26142590
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php

    r58304 r58713  
    388388        $this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN', 'CODE' ), $processor->get_breadcrumbs(), 'Failed to produce expected DOM nesting.' );
    389389
    390         $this->assertTrue( $processor->next_token(), 'Failed to advance past CODE tag to expected SPAN closer.' );
     390        $this->assertTrue(
     391            $processor->next_tag(
     392                array(
     393                    'tag_name'    => 'SPAN',
     394                    'tag_closers' => 'visit',
     395                )
     396            ),
     397            'Failed to advance past CODE tag to expected SPAN closer.'
     398        );
     399        $this->assertSame( 'SPAN', $processor->get_tag() );
    391400        $this->assertTrue( $processor->is_tag_closer(), 'Expected to find closing SPAN, but found opener instead.' );
    392401        $this->assertSame( array( 'HTML', 'BODY', 'DIV' ), $processor->get_breadcrumbs(), 'Failed to advance past CODE tag to expected DIV opener.' );
Note: See TracChangeset for help on using the changeset viewer.