Make WordPress Core

Changeset 55706


Ignore:
Timestamp:
05/03/2023 11:29:42 AM (17 months ago)
Author:
Bernhard Reiter
Message:

HTML API: Accumulate shift for internal parsing pointer.

A bug was discovered where where the parser wasn't returning to the
start of the affected tag after making some updates.

In few words, the Tag Processor has not been treating its own internal
pointer bytes_already_parsed the same way it treats its bookmarks.
That is, when updates are applied to the input document and then
get_updated_html() is called, the internal pointer transfers to
the newly-updated content as if no updates had been applied since
the previous call to get_updated_html().

In this patch we're creating a new "shift accumulator" to account for
all of the updates that accrue before calling get_updated_html().
This accumulated shift will be applied when swapping the input document
with the output buffer, which should result in the pointer pointing to
the same logical spot in the document it did before the udpate.

In effect this patch adds a single workaround for treating the
internal pointer like a bookmark, plus a temporary pointer which points
to the beginning of the current tag when calling get_updated_html().
This will preserve the assumption that updating a document doesn't
move that pointer, or shift which tag is currently matched.

Props dmsnell, zieladam.
Fixes #58179.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-tag-processor.php

    r55675 r55706  
    319319
    320320    /**
    321      * Holds updated HTML as updates are applied.
    322      *
    323      * Updates and unmodified portions of the input document are
    324      * appended to this value as they are applied. It will hold
    325      * a copy of the updated document up until the point of the
    326      * latest applied update. The fully-updated HTML document
    327      * will comprise this value plus the part of the input document
    328      * which follows that latest update.
    329      *
    330      * @see $bytes_already_copied
    331      *
    332      * @since 6.2.0
    333      * @var string
    334      */
    335     private $output_buffer = '';
    336 
    337     /**
    338321     * How many bytes from the original HTML document have been read and parsed.
    339322     *
     
    346329     */
    347330    private $bytes_already_parsed = 0;
    348 
    349     /**
    350      * How many bytes from the input HTML document have already been
    351      * copied into the output buffer.
    352      *
    353      * Lexical updates are enqueued and processed in batches. Prior
    354      * to any given update in the input document, there might exist
    355      * a span of HTML unaffected by any changes. This span ought to
    356      * be copied verbatim into the output buffer before applying the
    357      * following update. This value will point to the starting byte
    358      * offset in the input document where that unaffected span of
    359      * HTML starts.
    360      *
    361      * @since 6.2.0
    362      * @var int
    363      */
    364     private $bytes_already_copied = 0;
    365331
    366332    /**
     
    13041270     */
    13051271    private function after_tag() {
    1306         $this->class_name_updates_to_attributes_updates();
    1307         $this->apply_attributes_updates();
     1272        $this->get_updated_html();
    13081273        $this->tag_name_starts_at = null;
    13091274        $this->tag_name_length    = null;
     
    14611426     *
    14621427     * @since 6.2.0
     1428     * @since 6.2.1 Accumulates shift for internal cursor and passed pointer.
    14631429     * @since 6.3.0 Invalidate any bookmarks whose targets are overwritten.
    14641430     *
    1465      * @return void
    1466      */
    1467     private function apply_attributes_updates() {
     1431     * @param int $shift_this_point Accumulate and return shift for this position.
     1432     * @return int How many bytes the given pointer moved in response to the updates.
     1433     */
     1434    private function apply_attributes_updates( $shift_this_point = 0 ) {
    14681435        if ( ! count( $this->lexical_updates ) ) {
    1469             return;
    1470         }
     1436            return 0;
     1437        }
     1438
     1439        $accumulated_shift_for_given_point = 0;
    14711440
    14721441        /*
     
    14821451        usort( $this->lexical_updates, array( self::class, 'sort_start_ascending' ) );
    14831452
     1453        $bytes_already_copied = 0;
     1454        $output_buffer        = '';
    14841455        foreach ( $this->lexical_updates as $diff ) {
    1485             $this->output_buffer       .= substr( $this->html, $this->bytes_already_copied, $diff->start - $this->bytes_already_copied );
    1486             $this->output_buffer       .= $diff->text;
    1487             $this->bytes_already_copied = $diff->end;
    1488         }
     1456            $shift = strlen( $diff->text ) - ( $diff->end - $diff->start );
     1457
     1458            // Adjust the cursor position by however much an update affects it.
     1459            if ( $diff->start <= $this->bytes_already_parsed ) {
     1460                $this->bytes_already_parsed += $shift;
     1461            }
     1462
     1463            // Accumulate shift of the given pointer within this function call.
     1464            if ( $diff->start <= $shift_this_point ) {
     1465                $accumulated_shift_for_given_point += $shift;
     1466            }
     1467
     1468            $output_buffer        .= substr( $this->html, $bytes_already_copied, $diff->start - $bytes_already_copied );
     1469            $output_buffer        .= $diff->text;
     1470            $bytes_already_copied  = $diff->end;
     1471        }
     1472
     1473        $this->html = $output_buffer . substr( $this->html, $bytes_already_copied );
    14891474
    14901475        /*
     
    15281513
    15291514        $this->lexical_updates = array();
     1515
     1516        return $accumulated_shift_for_given_point;
    15301517    }
    15311518
     
    15771564        // Point this tag processor before the sought tag opener and consume it.
    15781565        $this->bytes_already_parsed = $this->bookmarks[ $bookmark_name ]->start;
    1579         $this->bytes_already_copied = $this->bytes_already_parsed;
    1580         $this->output_buffer        = substr( $this->html, 0, $this->bytes_already_copied );
    15811566        return $this->next_tag( array( 'tag_closers' => 'visit' ) );
    15821567    }
     
    21232108     *
    21242109     * @since 6.2.0
     2110     * @since 6.2.1 Shifts the internal cursor corresponding to the applied updates.
    21252111     *
    21262112     * @return string The processed HTML.
     
    21332119         * updated, return the original document and avoid a string copy.
    21342120         */
    2135         if ( $requires_no_updating && 0 === $this->bytes_already_copied ) {
     2121        if ( $requires_no_updating ) {
    21362122            return $this->html;
    21372123        }
    21382124
    21392125        /*
    2140          * If there are no updates left to apply, but some have already
    2141          * been applied, then finish by copying the rest of the input
    2142          * to the end of the updated document and return.
    2143          */
    2144         if ( $requires_no_updating && $this->bytes_already_copied > 0 ) {
    2145             $this->html                 = $this->output_buffer . substr( $this->html, $this->bytes_already_copied );
    2146             $this->bytes_already_copied = strlen( $this->output_buffer );
    2147             return $this->output_buffer . substr( $this->html, $this->bytes_already_copied );
    2148         }
    2149 
    2150         // Apply the updates, rewind to before the current tag, and reparse the attributes.
    2151         $content_up_to_opened_tag_name = $this->output_buffer . substr(
    2152             $this->html,
    2153             $this->bytes_already_copied,
    2154             $this->tag_name_starts_at + $this->tag_name_length - $this->bytes_already_copied
    2155         );
     2126         * Keep track of the position right before the current tag. This will
     2127         * be necessary for reparsing the current tag after updating the HTML.
     2128         */
     2129        $before_current_tag = $this->tag_name_starts_at - 1;
    21562130
    21572131        /*
    2158          * 1. Apply the edits by flushing them to the output buffer and updating the copied byte count.
    2159          *
    2160          * Note: `apply_attributes_updates()` modifies `$this->output_buffer`.
     2132         * 1. Apply the enqueued edits and update all the pointers to reflect those changes.
    21612133         */
    21622134        $this->class_name_updates_to_attributes_updates();
    2163         $this->apply_attributes_updates();
     2135        $before_current_tag += $this->apply_attributes_updates( $before_current_tag );
    21642136
    21652137        /*
    2166          * 2. Replace the original HTML with the now-updated HTML so that it's possible to
    2167          *    seek to a previous location and have a consistent view of the updated document.
    2168          */
    2169         $this->html                 = $this->output_buffer . substr( $this->html, $this->bytes_already_copied );
    2170         $this->output_buffer        = $content_up_to_opened_tag_name;
    2171         $this->bytes_already_copied = strlen( $this->output_buffer );
    2172 
    2173         /*
    2174          * 3. Point this tag processor at the original tag opener and consume it
     2138         * 2. Rewind to before the current tag and reparse to get updated attributes.
    21752139         *
    21762140         * At this point the internal cursor points to the end of the tag name.
     
    21842148         *                 \<-/ back up by strlen("em") + 1 ==> 3
    21852149         */
    2186         $this->bytes_already_parsed = strlen( $content_up_to_opened_tag_name ) - $this->tag_name_length - 1;
     2150
     2151        // Store existing state so it can be restored after reparsing.
     2152        $previous_parsed_byte_count = $this->bytes_already_parsed;
     2153        $previous_query             = $this->last_query;
     2154
     2155        // Reparse attributes.
     2156        $this->bytes_already_parsed = $before_current_tag;
    21872157        $this->next_tag();
     2158
     2159        // Restore previous state.
     2160        $this->bytes_already_parsed = $previous_parsed_byte_count;
     2161        $this->parse_query( $previous_query );
    21882162
    21892163        return $this->html;
  • trunk/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php

    r55675 r55706  
    573573
    574574    /**
     575     * Verifies that updates to a document before calls to `get_updated_html()` don't
     576     * lead to the Tag Processor jumping to the wrong tag after the updates.
     577     *
     578     * @ticket 58179
     579     *
     580     * @covers WP_HTML_Tag_Processor::get_updated_html
     581     */
     582    public function test_internal_pointer_returns_to_original_spot_after_inserting_content_before_cursor() {
     583        $tags = new WP_HTML_Tag_Processor( '<div>outside</div><section><div><img>inside</div></section>' );
     584
     585        $tags->next_tag();
     586        $tags->add_class( 'foo' );
     587        $tags->next_tag( 'section' );
     588
     589        // Return to this spot after moving ahead.
     590        $tags->set_bookmark( 'here' );
     591
     592        // Move ahead.
     593        $tags->next_tag( 'img' );
     594        $tags->seek( 'here' );
     595        $this->assertSame( '<div class="foo">outside</div><section><div><img>inside</div></section>', $tags->get_updated_html() );
     596        $this->assertSame( 'SECTION', $tags->get_tag() );
     597        $this->assertFalse( $tags->is_tag_closer() );
     598    }
     599
     600    /**
    575601     * @ticket 56299
    576602     *
     
    15231549
    15241550        $p = new WP_HTML_Tag_Processor( $input );
    1525         $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' );
     1551        $this->assertTrue( $p->next_tag( 'div' ), 'Did not find first DIV tag in input.' );
    15261552        $p->set_attribute( 'data-details', '{ "key": "value" }' );
    15271553        $p->add_class( 'is-processed' );
     
    15331559                )
    15341560            ),
    1535             'Querying an existing tag did not return true'
     1561            'Did not find the first BtnGroup DIV tag'
    15361562        );
    15371563        $p->remove_class( 'BtnGroup' );
     
    15451571                )
    15461572            ),
    1547             'Querying an existing tag did not return true'
     1573            'Did not find the second BtnGroup DIV tag'
    15481574        );
    15491575        $p->remove_class( 'BtnGroup' );
     
    15581584                )
    15591585            ),
    1560             'Querying an existing tag did not return true'
     1586            'Did not find third BUTTON tag with "btn" CSS class'
    15611587        );
    15621588        $p->remove_attribute( 'class' );
    1563         $this->assertFalse( $p->next_tag( 'non-existent' ), 'Querying a non-existing tag did not return false' );
     1589        $this->assertFalse( $p->next_tag( 'non-existent' ), "Found a {$p->get_tag()} tag when none should have been found." );
    15641590        $p->set_attribute( 'class', 'test' );
    15651591        $this->assertSame( $expected_output, $p->get_updated_html(), 'Calling get_updated_html after updating the attributes did not return the expected HTML' );
Note: See TracChangeset for help on using the changeset viewer.