Make WordPress Core

Changeset 57768


Ignore:
Timestamp:
03/05/2024 05:32:19 AM (11 months ago)
Author:
dmsnell
Message:

HTML API: Ensure that breadcrumbs are properly retained after seeking.

In some cases, it's possible to seek back into a location found inside
an element which has been closed before the point in the document where
the seek() was made. In these cases the breadcrumb stack is lost, and
calling get_breadcrumbs() after the seek will return the wrong information.

In this patch, the HTML Processor takes a conservative approach and
moves to the front of the document, then reparses the document until
it reaches the sought-after location. This ensures consistency on
the stack of open elements and active formats, and preserves
breadcrumbs.

Developed in https://github.com/WordPress/wordpress-develop/pull/6185
Discussed in https://core.trac.wordpress.org/ticket/60687

Props jonsurrell.
Follow-up to [60687].
See #58517.
Fixes #60687.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-processor.php

    r57582 r57768  
    242242        }
    243243
    244         $p                        = new self( $html, self::CONSTRUCTOR_UNLOCK_CODE );
    245         $p->state->context_node   = array( 'BODY', array() );
    246         $p->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
     244        $processor                        = new self( $html, self::CONSTRUCTOR_UNLOCK_CODE );
     245        $processor->state->context_node   = array( 'BODY', array() );
     246        $processor->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
    247247
    248248        // @todo Create "fake" bookmarks for non-existent but implied nodes.
    249         $p->bookmarks['root-node']    = new WP_HTML_Span( 0, 0 );
    250         $p->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 );
    251 
    252         $p->state->stack_of_open_elements->push(
     249        $processor->bookmarks['root-node']    = new WP_HTML_Span( 0, 0 );
     250        $processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 );
     251
     252        $processor->state->stack_of_open_elements->push(
    253253            new WP_HTML_Token(
    254254                'root-node',
     
    258258        );
    259259
    260         $p->state->stack_of_open_elements->push(
     260        $processor->state->stack_of_open_elements->push(
    261261            new WP_HTML_Token(
    262262                'context-node',
    263                 $p->state->context_node[0],
     263                $processor->state->context_node[0],
    264264                false
    265265            )
    266266        );
    267267
    268         return $p;
     268        return $processor;
    269269    }
    270270
     
    12271227     * Moves the internal cursor in the HTML Processor to a given bookmark's location.
    12281228     *
     1229     * Be careful! Seeking backwards to a previous location resets the parser to the
     1230     * start of the document and reparses the entire contents up until it finds the
     1231     * sought-after bookmarked location.
     1232     *
    12291233     * In order to prevent accidental infinite loops, there's a
    12301234     * maximum limit on the number of times seek() can be called.
     
    12481252        $direction            = $bookmark_starts_at > $processor_started_at ? 'forward' : 'backward';
    12491253
    1250         switch ( $direction ) {
    1251             case 'forward':
    1252                 // When moving forwards, reparse the document until reaching the same location as the original bookmark.
    1253                 while ( $this->step() ) {
    1254                     if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) {
    1255                         return true;
    1256                     }
    1257                 }
    1258 
    1259                 return false;
    1260 
    1261             case 'backward':
    1262                 /*
    1263                  * When moving backwards, clear out all existing stack entries which appear after the destination
    1264                  * bookmark. These could be stored for later retrieval, but doing so would require additional
    1265                  * memory overhead and also demand that references and bookmarks are updated as the document
    1266                  * changes. In time this could be a valuable optimization, but it's okay to give up that
    1267                  * optimization in exchange for more CPU time to recompute the stack, to re-parse the
    1268                  * document that may have already been parsed once.
    1269                  */
    1270                 foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
    1271                     if ( $bookmark_starts_at >= $this->bookmarks[ $item->bookmark_name ]->start ) {
    1272                         break;
    1273                     }
    1274 
    1275                     $this->state->stack_of_open_elements->remove_node( $item );
    1276                 }
    1277 
    1278                 foreach ( $this->state->active_formatting_elements->walk_up() as $item ) {
    1279                     if ( $bookmark_starts_at >= $this->bookmarks[ $item->bookmark_name ]->start ) {
    1280                         break;
    1281                     }
    1282 
    1283                     $this->state->active_formatting_elements->remove_node( $item );
    1284                 }
    1285 
    1286                 return parent::seek( $actual_bookmark_name );
    1287         }
     1254        /*
     1255         * If seeking backwards, it's possible that the sought-after bookmark exists within an element
     1256         * which has been closed before the current cursor; in other words, it has already been removed
     1257         * from the stack of open elements. This means that it's insufficient to simply pop off elements
     1258         * from the stack of open elements which appear after the bookmarked location and then jump to
     1259         * that location, as the elements which were open before won't be re-opened.
     1260         *
     1261         * In order to maintain consistency, the HTML Processor rewinds to the start of the document
     1262         * and reparses everything until it finds the sought-after bookmark.
     1263         *
     1264         * There are potentially better ways to do this: cache the parser state for each bookmark and
     1265         * restore it when seeking; store an immutable and idempotent register of where elements open
     1266         * and close.
     1267         *
     1268         * If caching the parser state it will be essential to properly maintain the cached stack of
     1269         * open elements and active formatting elements when modifying the document. This could be a
     1270         * tedious and time-consuming process as well, and so for now will not be performed.
     1271         *
     1272         * It may be possible to track bookmarks for where elements open and close, and in doing so
     1273         * be able to quickly recalculate breadcrumbs for any element in the document. It may even
     1274         * be possible to remove the stack of open elements and compute it on the fly this way.
     1275         * If doing this, the parser would need to track the opening and closing locations for all
     1276         * tokens in the breadcrumb path for any and all bookmarks. By utilizing bookmarks themselves
     1277         * this list could be automatically maintained while modifying the document. Finding the
     1278         * breadcrumbs would then amount to traversing that list from the start until the token
     1279         * being inspected. Once an element closes, if there are no bookmarks pointing to locations
     1280         * within that element, then all of these locations may be forgotten to save on memory use
     1281         * and computation time.
     1282         */
     1283        if ( 'backward' === $direction ) {
     1284            /*
     1285             * Instead of clearing the parser state and starting fresh, calling the stack methods
     1286             * maintains the proper flags in the parser.
     1287             */
     1288            foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
     1289                if ( 'context-node' === $item->bookmark_name ) {
     1290                    break;
     1291                }
     1292
     1293                $this->state->stack_of_open_elements->remove_node( $item );
     1294            }
     1295
     1296            foreach ( $this->state->active_formatting_elements->walk_up() as $item ) {
     1297                if ( 'context-node' === $item->bookmark_name ) {
     1298                    break;
     1299                }
     1300
     1301                $this->state->active_formatting_elements->remove_node( $item );
     1302            }
     1303
     1304            parent::seek( 'context-node' );
     1305            $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
     1306            $this->state->frameset_ok    = true;
     1307        }
     1308
     1309        // When moving forwards, reparse the document until reaching the same location as the original bookmark.
     1310        if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) {
     1311            return true;
     1312        }
     1313
     1314        while ( $this->step() ) {
     1315            if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) {
     1316                return true;
     1317            }
     1318        }
     1319
     1320        return false;
    12881321    }
    12891322
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php

    r57508 r57768  
    546546        $this->assertTrue( $processor->get_attribute( 'two' ) );
    547547    }
     548
     549    /**
     550     * Ensures that breadcrumbs are properly reported after seeking backward to a location
     551     * inside an element which has been fully closed before the seek.
     552     *
     553     * @ticket 60687
     554     */
     555    public function test_retains_proper_bookmarks_after_seeking_back_to_closed_element() {
     556        $processor = WP_HTML_Processor::create_fragment( '<div><img></div><div><hr></div>' );
     557
     558        $processor->next_tag( 'IMG' );
     559        $processor->set_bookmark( 'first' );
     560
     561        $processor->next_tag( 'HR' );
     562
     563        $processor->seek( 'first' );
     564        $this->assertSame(
     565            array( 'HTML', 'BODY', 'DIV', 'IMG' ),
     566            $processor->get_breadcrumbs(),
     567            'Should have retained breadcrumbs from bookmarked location after seeking backwards to it.'
     568        );
     569    }
    548570}
Note: See TracChangeset for help on using the changeset viewer.