Make WordPress Core

Changeset 55708


Ignore:
Timestamp:
05/03/2023 12:09:22 PM (16 months ago)
Author:
Bernhard Reiter
Message:

HTML API: Accumulate shift for internal parsing pointer.

A bug was discovered where where the parser wasn't returning to the
start of the affected tag after making some updates.

In few words, the Tag Processor has not been treating its own internal
pointer bytes_already_parsed the same way it treats its bookmarks.
That is, when updates are applied to the input document and then
get_updated_html() is called, the internal pointer transfers to
the newly-updated content as if no updates had been applied since
the previous call to get_updated_html().

In this patch we're creating a new "shift accumulator" to account for
all of the updates that accrue before calling get_updated_html().
This accumulated shift will be applied when swapping the input document
with the output buffer, which should result in the pointer pointing to
the same logical spot in the document it did before the udpate.

In effect this patch adds a single workaround for treating the
internal pointer like a bookmark, plus a temporary pointer which points
to the beginning of the current tag when calling get_updated_html().
This will preserve the assumption that updating a document doesn't
move that pointer, or shift which tag is currently matched.

Props dmsnell, zieladam.
Merges [55706] to the 6.2 branch.
Fixes #58179.

Location:
branches/6.2
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/6.2/src/wp-includes/html-api/class-wp-html-tag-processor.php

    r55707 r55708  
    319319
    320320    /**
    321      * Holds updated HTML as updates are applied.
    322      *
    323      * Updates and unmodified portions of the input document are
    324      * appended to this value as they are applied. It will hold
    325      * a copy of the updated document up until the point of the
    326      * latest applied update. The fully-updated HTML document
    327      * will comprise this value plus the part of the input document
    328      * which follows that latest update.
    329      *
    330      * @see $bytes_already_copied
    331      *
    332      * @since 6.2.0
    333      * @var string
    334      */
    335     private $output_buffer = '';
    336 
    337     /**
    338321     * How many bytes from the original HTML document have been read and parsed.
    339322     *
     
    346329     */
    347330    private $bytes_already_parsed = 0;
    348 
    349     /**
    350      * How many bytes from the input HTML document have already been
    351      * copied into the output buffer.
    352      *
    353      * Lexical updates are enqueued and processed in batches. Prior
    354      * to any given update in the input document, there might exist
    355      * a span of HTML unaffected by any changes. This span ought to
    356      * be copied verbatim into the output buffer before applying the
    357      * following update. This value will point to the starting byte
    358      * offset in the input document where that unaffected span of
    359      * HTML starts.
    360      *
    361      * @since 6.2.0
    362      * @var int
    363      */
    364     private $bytes_already_copied = 0;
    365331
    366332    /**
     
    13041270     */
    13051271    private function after_tag() {
    1306         $this->class_name_updates_to_attributes_updates();
    1307         $this->apply_attributes_updates();
     1272        $this->get_updated_html();
    13081273        $this->tag_name_starts_at = null;
    13091274        $this->tag_name_length    = null;
     
    14611426     *
    14621427     * @since 6.2.0
    1463      *
    1464      * @return void
    1465      */
    1466     private function apply_attributes_updates() {
     1428     * @since 6.2.1 Accumulates shift for internal cursor and passed pointer.
     1429     *
     1430     * @param int $shift_this_point Accumulate and return shift for this position.
     1431     * @return int How many bytes the given pointer moved in response to the updates.
     1432     */
     1433    private function apply_attributes_updates( $shift_this_point = 0 ) {
    14671434        if ( ! count( $this->lexical_updates ) ) {
    1468             return;
    1469         }
     1435            return 0;
     1436        }
     1437
     1438        $accumulated_shift_for_given_point = 0;
    14701439
    14711440        /*
     
    14811450        usort( $this->lexical_updates, array( self::class, 'sort_start_ascending' ) );
    14821451
     1452        $bytes_already_copied = 0;
     1453        $output_buffer        = '';
    14831454        foreach ( $this->lexical_updates as $diff ) {
    1484             $this->output_buffer       .= substr( $this->html, $this->bytes_already_copied, $diff->start - $this->bytes_already_copied );
    1485             $this->output_buffer       .= $diff->text;
    1486             $this->bytes_already_copied = $diff->end;
    1487         }
     1455            $shift = strlen( $diff->text ) - ( $diff->end - $diff->start );
     1456
     1457            // Adjust the cursor position by however much an update affects it.
     1458            if ( $diff->start <= $this->bytes_already_parsed ) {
     1459                $this->bytes_already_parsed += $shift;
     1460            }
     1461
     1462            // Accumulate shift of the given pointer within this function call.
     1463            if ( $diff->start <= $shift_this_point ) {
     1464                $accumulated_shift_for_given_point += $shift;
     1465            }
     1466
     1467            $output_buffer        .= substr( $this->html, $bytes_already_copied, $diff->start - $bytes_already_copied );
     1468            $output_buffer        .= $diff->text;
     1469            $bytes_already_copied  = $diff->end;
     1470        }
     1471
     1472        $this->html = $output_buffer . substr( $this->html, $bytes_already_copied );
    14881473
    14891474        /*
     
    15251510
    15261511        $this->lexical_updates = array();
     1512
     1513        return $accumulated_shift_for_given_point;
    15271514    }
    15281515
     
    15621549        // Point this tag processor before the sought tag opener and consume it.
    15631550        $this->bytes_already_parsed = $this->bookmarks[ $bookmark_name ]->start;
    1564         $this->bytes_already_copied = $this->bytes_already_parsed;
    1565         $this->output_buffer        = substr( $this->html, 0, $this->bytes_already_copied );
    15661551        return $this->next_tag( array( 'tag_closers' => 'visit' ) );
    15671552    }
     
    20832068     *
    20842069     * @since 6.2.0
     2070     * @since 6.2.1 Shifts the internal cursor corresponding to the applied updates.
    20852071     *
    20862072     * @return string The processed HTML.
     
    20932079         * updated, return the original document and avoid a string copy.
    20942080         */
    2095         if ( $requires_no_updating && 0 === $this->bytes_already_copied ) {
     2081        if ( $requires_no_updating ) {
    20962082            return $this->html;
    20972083        }
    20982084
    20992085        /*
    2100          * If there are no updates left to apply, but some have already
    2101          * been applied, then finish by copying the rest of the input
    2102          * to the end of the updated document and return.
    2103          */
    2104         if ( $requires_no_updating && $this->bytes_already_copied > 0 ) {
    2105             $this->html                 = $this->output_buffer . substr( $this->html, $this->bytes_already_copied );
    2106             $this->bytes_already_copied = strlen( $this->output_buffer );
    2107             return $this->output_buffer . substr( $this->html, $this->bytes_already_copied );
    2108         }
    2109 
    2110         // Apply the updates, rewind to before the current tag, and reparse the attributes.
    2111         $content_up_to_opened_tag_name = $this->output_buffer . substr(
    2112             $this->html,
    2113             $this->bytes_already_copied,
    2114             $this->tag_name_starts_at + $this->tag_name_length - $this->bytes_already_copied
    2115         );
     2086         * Keep track of the position right before the current tag. This will
     2087         * be necessary for reparsing the current tag after updating the HTML.
     2088         */
     2089        $before_current_tag = $this->tag_name_starts_at - 1;
    21162090
    21172091        /*
    2118          * 1. Apply the edits by flushing them to the output buffer and updating the copied byte count.
    2119          *
    2120          * Note: `apply_attributes_updates()` modifies `$this->output_buffer`.
     2092         * 1. Apply the enqueued edits and update all the pointers to reflect those changes.
    21212093         */
    21222094        $this->class_name_updates_to_attributes_updates();
    2123         $this->apply_attributes_updates();
     2095        $before_current_tag += $this->apply_attributes_updates( $before_current_tag );
    21242096
    21252097        /*
    2126          * 2. Replace the original HTML with the now-updated HTML so that it's possible to
    2127          *    seek to a previous location and have a consistent view of the updated document.
    2128          */
    2129         $this->html                 = $this->output_buffer . substr( $this->html, $this->bytes_already_copied );
    2130         $this->output_buffer        = $content_up_to_opened_tag_name;
    2131         $this->bytes_already_copied = strlen( $this->output_buffer );
    2132 
    2133         /*
    2134          * 3. Point this tag processor at the original tag opener and consume it
     2098         * 2. Rewind to before the current tag and reparse to get updated attributes.
    21352099         *
    21362100         * At this point the internal cursor points to the end of the tag name.
     
    21442108         *                 \<-/ back up by strlen("em") + 1 ==> 3
    21452109         */
    2146         $this->bytes_already_parsed = strlen( $content_up_to_opened_tag_name ) - $this->tag_name_length - 1;
     2110
     2111        // Store existing state so it can be restored after reparsing.
     2112        $previous_parsed_byte_count = $this->bytes_already_parsed;
     2113        $previous_query             = $this->last_query;
     2114
     2115        // Reparse attributes.
     2116        $this->bytes_already_parsed = $before_current_tag;
    21472117        $this->next_tag();
     2118
     2119        // Restore previous state.
     2120        $this->bytes_already_parsed = $previous_parsed_byte_count;
     2121        $this->parse_query( $previous_query );
    21482122
    21492123        return $this->html;
  • branches/6.2/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php

    r55707 r55708  
    520520        $p = new WP_HTML_Tag_Processor( 'abc</title>' );
    521521        $this->assertTrue( $p->next_tag( array( 'tag_closers' => 'visit' ) ), 'Did not find the </title> tag closer when there was no tag opener' );
     522    }
     523
     524    /**
     525     * Verifies that updates to a document before calls to `get_updated_html()` don't
     526     * lead to the Tag Processor jumping to the wrong tag after the updates.
     527     *
     528     * @ticket 58179
     529     *
     530     * @covers WP_HTML_Tag_Processor::get_updated_html
     531     */
     532    public function test_internal_pointer_returns_to_original_spot_after_inserting_content_before_cursor() {
     533        $tags = new WP_HTML_Tag_Processor( '<div>outside</div><section><div><img>inside</div></section>' );
     534
     535        $tags->next_tag();
     536        $tags->add_class( 'foo' );
     537        $tags->next_tag( 'section' );
     538
     539        // Return to this spot after moving ahead.
     540        $tags->set_bookmark( 'here' );
     541
     542        // Move ahead.
     543        $tags->next_tag( 'img' );
     544        $tags->seek( 'here' );
     545        $this->assertSame( '<div class="foo">outside</div><section><div><img>inside</div></section>', $tags->get_updated_html() );
     546        $this->assertSame( 'SECTION', $tags->get_tag() );
     547        $this->assertFalse( $tags->is_tag_closer() );
    522548    }
    523549
     
    14731499
    14741500        $p = new WP_HTML_Tag_Processor( $input );
    1475         $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' );
     1501        $this->assertTrue( $p->next_tag( 'div' ), 'Did not find first DIV tag in input.' );
    14761502        $p->set_attribute( 'data-details', '{ "key": "value" }' );
    14771503        $p->add_class( 'is-processed' );
     
    14831509                )
    14841510            ),
    1485             'Querying an existing tag did not return true'
     1511            'Did not find the first BtnGroup DIV tag'
    14861512        );
    14871513        $p->remove_class( 'BtnGroup' );
     
    14951521                )
    14961522            ),
    1497             'Querying an existing tag did not return true'
     1523            'Did not find the second BtnGroup DIV tag'
    14981524        );
    14991525        $p->remove_class( 'BtnGroup' );
     
    15081534                )
    15091535            ),
    1510             'Querying an existing tag did not return true'
     1536            'Did not find third BUTTON tag with "btn" CSS class'
    15111537        );
    15121538        $p->remove_attribute( 'class' );
    1513         $this->assertFalse( $p->next_tag( 'non-existent' ), 'Querying a non-existing tag did not return false' );
     1539        $this->assertFalse( $p->next_tag( 'non-existent' ), "Found a {$p->get_tag()} tag when none should have been found." );
    15141540        $p->set_attribute( 'class', 'test' );
    15151541        $this->assertSame( $expected_output, $p->get_updated_html(), 'Calling get_updated_html after updating the attributes did not return the expected HTML' );
Note: See TracChangeset for help on using the changeset viewer.