Make WordPress Core

Changeset 57815


Ignore:
Timestamp:
03/12/2024 02:25:30 PM (7 weeks ago)
Author:
audrasjb
Message:

HTML API: Defer applying attribute updates until necessary.

When making repeated updates to a document, the Tag Processor will end
up copying the entire document once for every update. This can lead to
catastrophic behavior in the worse case.

However, when batch-applying updates it's able to copy chunks of the
document in one thread and only end up copying the entire document once
for the entire batch.

Previously the Tag Processor has been eagerly applying udpates, but in
this patch it defers applying those updates as long as is possible.
Developed in https://github.com/WordPress/wordpress-develop/pull/6120
Discussed in https://core.trac.wordpress.org/ticket/60697

Follow-up to [55706], [56941], [57348].

Reviewed by swissspidy.
Merges [57805] to the to the 6.5 branch.

Props dmsnell, bernhard-reiter, jonsurrell, westonruter.
Fixes #60697.

Location:
branches/6.5
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/6.5

  • branches/6.5/src/wp-includes/html-api/class-wp-html-tag-processor.php

    r57738 r57815  
    838838     */
    839839    public function next_token() {
     840        return $this->base_class_next_token();
     841    }
     842
     843    /**
     844     * Internal method which finds the next token in the HTML document.
     845     *
     846     * This method is a protected internal function which implements the logic for
     847     * finding the next token in a document. It exists so that the parser can update
     848     * its state without affecting the location of the cursor in the document and
     849     * without triggering subclass methods for things like `next_token()`, e.g. when
     850     * applying patches before searching for the next token.
     851     *
     852     * @since 6.5.0
     853     *
     854     * @access private
     855     *
     856     * @return bool Whether a token was parsed.
     857     */
     858    private function base_class_next_token() {
    840859        $was_at = $this->bytes_already_parsed;
    841         $this->get_updated_html();
     860        $this->after_tag();
    842861
    843862        // Don't proceed if there's nothing more to scan.
     
    20422061     */
    20432062    private function after_tag() {
     2063        /*
     2064         * There could be lexical updates enqueued for an attribute that
     2065         * also exists on the next tag. In order to avoid conflating the
     2066         * attributes across the two tags, lexical updates with names
     2067         * need to be flushed to raw lexical updates.
     2068         */
     2069        $this->class_name_updates_to_attributes_updates();
     2070
     2071        /*
     2072         * Purge updates if there are too many. The actual count isn't
     2073         * scientific, but a few values from 100 to a few thousand were
     2074         * tests to find a practially-useful limit.
     2075         *
     2076         * If the update queue grows too big, then the Tag Processor
     2077         * will spend more time iterating through them and lose the
     2078         * efficiency gains of deferring applying them.
     2079         */
     2080        if ( 1000 < count( $this->lexical_updates ) ) {
     2081            $this->get_updated_html();
     2082        }
     2083
     2084        foreach ( $this->lexical_updates as $name => $update ) {
     2085            /*
     2086             * Any updates appearing after the cursor should be applied
     2087             * before proceeding, otherwise they may be overlooked.
     2088             */
     2089            if ( $update->start >= $this->bytes_already_parsed ) {
     2090                $this->get_updated_html();
     2091                break;
     2092            }
     2093
     2094            if ( is_int( $name ) ) {
     2095                continue;
     2096            }
     2097
     2098            $this->lexical_updates[] = $update;
     2099            unset( $this->lexical_updates[ $name ] );
     2100        }
     2101
    20442102        $this->token_starts_at      = null;
    20452103        $this->token_length         = null;
     
    22312289
    22322290            // Adjust the cursor position by however much an update affects it.
    2233             if ( $diff->start <= $this->bytes_already_parsed ) {
     2291            if ( $diff->start < $this->bytes_already_parsed ) {
    22342292                $this->bytes_already_parsed += $shift;
    22352293            }
     
    31653223         */
    31663224        $this->bytes_already_parsed = $before_current_tag;
    3167         $this->parse_next_tag();
    3168         // Reparse the attributes.
    3169         while ( $this->parse_next_attribute() ) {
    3170             continue;
    3171         }
    3172 
    3173         $tag_ends_at                = strpos( $this->html, '>', $this->bytes_already_parsed );
    3174         $this->token_length         = $tag_ends_at - $this->token_starts_at;
    3175         $this->bytes_already_parsed = $tag_ends_at;
     3225        $this->base_class_next_token();
    31763226
    31773227        return $this->html;
  • branches/6.5/tests/phpunit/tests/html-api/wpHtmlTagProcessor-bookmark.php

    r57527 r57815  
    294294    /**
    295295     * @ticket 56299
     296     * @ticket 60697
    296297     *
    297298     * @covers WP_HTML_Tag_Processor::seek
     
    300301        $processor = new WP_HTML_Tag_Processor( '<div>First</div><div>Second</div>' );
    301302        $processor->next_tag();
    302         $processor->set_bookmark( 'first' );
    303         $processor->next_tag();
     303        $processor->set_attribute( 'id', 'one' );
     304        $processor->set_bookmark( 'first' );
     305        $processor->next_tag();
     306        $processor->set_attribute( 'id', 'two' );
    304307        $processor->add_class( 'second' );
    305308
     
    308311
    309312        $this->assertSame(
    310             '<div class="first">First</div><div class="second">Second</div>',
     313            'one',
     314            $processor->get_attribute( 'id' ),
     315            'Should have remembered attribute change from before the seek.'
     316        );
     317
     318        $this->assertSame(
     319            '<div class="first" id="one">First</div><div class="second" id="two">Second</div>',
    311320            $processor->get_updated_html(),
    312321            'The bookmark was updated incorrectly in response to HTML markup updates'
  • branches/6.5/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php

    r57508 r57815  
    27282728        $this->assertSame( 'test< /A>', $processor->get_modifiable_text(), 'Did not find complete text node.' );
    27292729    }
     2730
     2731    /**
     2732     * Ensures that updates which are enqueued in front of the cursor
     2733     * are applied before moving forward in the document.
     2734     *
     2735     * @ticket 60697
     2736     */
     2737    public function test_applies_updates_before_proceeding() {
     2738        $html = '<div><img></div><div><img></div>';
     2739
     2740        $subclass = new class( $html ) extends WP_HTML_Tag_Processor {
     2741            /**
     2742             * Inserts raw text after the current token.
     2743             *
     2744             * @param string $new_html Raw text to insert.
     2745             */
     2746            public function insert_after( $new_html ) {
     2747                $this->set_bookmark( 'here' );
     2748                $this->lexical_updates[] = new WP_HTML_Text_Replacement(
     2749                    $this->bookmarks['here']->start + $this->bookmarks['here']->length + 1,
     2750                    0,
     2751                    $new_html
     2752                );
     2753            }
     2754        };
     2755
     2756        $subclass->next_tag( 'img' );
     2757        $subclass->insert_after( '<p>snow-capped</p>' );
     2758
     2759        $subclass->next_tag();
     2760        $this->assertSame(
     2761            'P',
     2762            $subclass->get_tag(),
     2763            'Should have matched inserted HTML as next tag.'
     2764        );
     2765
     2766        $subclass->next_tag( 'img' );
     2767        $subclass->set_attribute( 'alt', 'mountain' );
     2768
     2769        $this->assertSame(
     2770            '<div><img><p>snow-capped</p></div><div><img alt="mountain"></div>',
     2771            $subclass->get_updated_html(),
     2772            'Should have properly applied the update from in front of the cursor.'
     2773        );
     2774    }
    27302775}
Note: See TracChangeset for help on using the changeset viewer.