Make WordPress Core

Changeset 58040


Ignore:
Timestamp:
04/24/2024 07:43:02 AM (7 months ago)
Author:
dmsnell
Message:

HTML API: Fix detection of single-length funky comments.

Since [60428] the Tag Processor has been misidentifying single-character
funky comments. It has been asserting that the full token-length for a
funky comment must be at least three characters after the opening (e.g.
</1>), but it has been starting to look for the closing > after
those same three characters. This means that it has been skipping the
actual close of these funky comments and swallowing up the next syntax
until it finds a >, often consuming the next tag in the process.

This patch fixes the detector and restores finding the following token.

Developed in https://github.com/WordPress/wordpress-develop/pull/6412
Discussed in https://core.trac.wordpress.org/ticket/60170

Follow-up to [60428].
Fixes #60170.
Props dmsnell, gziolo, jonsurrell.

Location:
trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-tag-processor.php

    r57987 r58040  
    16301630             * https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
    16311631             */
    1632             if ( '!' === $html[ $at + 1 ] ) {
     1632            if ( ! $this->is_closing_tag && '!' === $html[ $at + 1 ] ) {
    16331633                /*
    16341634                 * `<!--` transitions to a comment state – apply further comment rules.
     
    18101810             */
    18111811            if ( '>' === $html[ $at + 1 ] ) {
     1812                // `<>` is interpreted as plaintext.
     1813                if ( ! $this->is_closing_tag ) {
     1814                    ++$at;
     1815                    continue;
     1816                }
     1817
    18121818                $this->parser_state         = self::STATE_PRESUMPTUOUS_TAG;
    18131819                $this->token_length         = $at + 2 - $this->token_starts_at;
     
    18201826             * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
    18211827             */
    1822             if ( '?' === $html[ $at + 1 ] ) {
     1828            if ( ! $this->is_closing_tag && '?' === $html[ $at + 1 ] ) {
    18231829                $closer_at = strpos( $html, '>', $at + 2 );
    18241830                if ( false === $closer_at ) {
     
    18921898                }
    18931899
    1894                 $closer_at = strpos( $html, '>', $at + 3 );
     1900                $closer_at = strpos( $html, '>', $at + 2 );
    18951901                if ( false === $closer_at ) {
    18961902                    $this->parser_state = self::STATE_INCOMPLETE_INPUT;
  • trunk/tests/phpunit/tests/html-api/wpHtmlTagProcessor-token-scanning.php

    r57508 r58040  
    762762
    763763    /**
     764     * Ensures that various funky comments are properly parsed.
     765     *
     766     * @ticket 60170
     767     *
     768     * @since 6.6.0
     769     *
     770     * @covers WP_HTML_Tag_Processor::next_token
     771     *
     772     * @dataProvider data_various_funky_comments
     773     *
     774     * @param string $funky_comment_html HTML containing a funky comment.
     775     * @param string $modifiable_text    Expected modifiable text of first funky comment in HTML.
     776     */
     777    public function test_various_funky_comments( $funky_comment_html, $modifiable_text ) {
     778        $processor = new WP_HTML_Tag_Processor( $funky_comment_html );
     779        while ( '#funky-comment' !== $processor->get_token_type() && $processor->next_token() ) {
     780            continue;
     781        }
     782
     783        $this->assertSame(
     784            '#funky-comment',
     785            $processor->get_token_type(),
     786            'Failed to find the expected funky comment.'
     787        );
     788
     789        $this->assertSame(
     790            $modifiable_text,
     791            $processor->get_modifiable_text(),
     792            'Found the wrong modifiable text span inside a funky comment.'
     793        );
     794    }
     795
     796    /**
     797     * Data provider.
     798     *
     799     * @return array[].
     800     */
     801    public static function data_various_funky_comments() {
     802        return array(
     803            'Space'          => array( '</ >', ' ' ),
     804            'Short-bang'     => array( '</!>', '!' ),
     805            'Question mark'  => array( '</?>', '?' ),
     806            'Short-slash'    => array( '<//>', '/' ),
     807            'Bit (no attrs)' => array( '<//wp:post-meta>', '/wp:post-meta' ),
     808            'Bit (attrs)'    => array( '<//wp:post-meta key=isbn>', '/wp:post-meta key=isbn' ),
     809            'Curly-wrapped'  => array( '</{json}>', '{json}' ),
     810            'Before P'       => array( '</1><p>', '1' ),
     811            'After P'        => array( '<p></__("Read more")></p>', '__("Read more")' ),
     812            'Reference'      => array( '</&gt;>', '&gt;' ),
     813        );
     814    }
     815
     816    /**
    764817     * Test helper that wraps a string in double quotes.
    765818     *
  • trunk/tests/phpunit/tests/interactivity-api/wpInteractivityAPI.php

    r57987 r58040  
    589589     *
    590590     * @covers ::process_directives
    591      */
    592     public function test_process_directives_doesnt_change_html_if_contains_unbalanced_tags() {
     591     *
     592     * @dataProvider data_html_with_unbalanced_tags
     593     *
     594     * @param string $html HTML containing unbalanced tags and also a directive.
     595     */
     596    public function test_process_directives_doesnt_change_html_if_contains_unbalanced_tags( $html ) {
    593597        $this->interactivity->state( 'myPlugin', array( 'id' => 'some-id' ) );
    594598
    595         $html_samples = array(
    596             '<div data-wp-bind--id="myPlugin::state.id">Inner content</div></div>',
    597             '<div data-wp-bind--id="myPlugin::state.id">Inner content</div><div>',
    598             '<div><div data-wp-bind--id="myPlugin::state.id">Inner content</div>',
    599             '</div><div data-wp-bind--id="myPlugin::state.id">Inner content</div>',
    600             '<div data-wp-bind--id="myPlugin::state.id">Inner<div>content</div>',
    601             '<div data-wp-bind--id="myPlugin::state.id">Inner</div>content</div>',
    602             '<div data-wp-bind--id="myPlugin::state.id"><span>Inner content</div>',
    603             '<div data-wp-bind--id="myPlugin::state.id">Inner content</div></span>',
    604             '<div data-wp-bind--id="myPlugin::state.id"><span>Inner content</div></span>',
    605             '<div data-wp-bind--id="myPlugin::state.id">Inner conntent</ ></div>',
    606         );
    607 
    608         foreach ( $html_samples as $html ) {
    609             $processed_html = $this->interactivity->process_directives( $html );
    610             $p              = new WP_HTML_Tag_Processor( $processed_html );
    611             $p->next_tag();
    612             $this->assertNull( $p->get_attribute( 'id' ) );
    613         }
     599        $processed_html = $this->interactivity->process_directives( $html );
     600        $p              = new WP_HTML_Tag_Processor( $processed_html );
     601        $p->next_tag();
     602        $this->assertNull( $p->get_attribute( 'id' ) );
     603    }
     604
     605    /**
     606     * Data provider.
     607     *
     608     * @return array[].
     609     */
     610    public static function data_html_with_unbalanced_tags() {
     611        return array(
     612            'DIV closer after'   => array( '<div data-wp-bind--id="myPlugin::state.id">Inner content</div></div>' ),
     613            'DIV opener after'   => array( '<div data-wp-bind--id="myPlugin::state.id">Inner content</div><div>' ),
     614            'DIV opener before'  => array( '<div><div data-wp-bind--id="myPlugin::state.id">Inner content</div>' ),
     615            'DIV closer before'  => array( '</div><div data-wp-bind--id="myPlugin::state.id">Inner content</div>' ),
     616            'DIV opener inside'  => array( '<div data-wp-bind--id="myPlugin::state.id">Inner<div>content</div>' ),
     617            'DIV closer inside'  => array( '<div data-wp-bind--id="myPlugin::state.id">Inner</div>content</div>' ),
     618            'SPAN opener inside' => array( '<div data-wp-bind--id="myPlugin::state.id"><span>Inner content</div>' ),
     619            'SPAN closer after'  => array( '<div data-wp-bind--id="myPlugin::state.id">Inner content</div></span>' ),
     620            'SPAN overlapping'   => array( '<div data-wp-bind--id="myPlugin::state.id"><span>Inner content</div></span>' ),
     621        );
    614622    }
    615623
Note: See TracChangeset for help on using the changeset viewer.