Make WordPress Core

Changeset 58424


Ignore:
Timestamp:
06/17/2024 12:02:50 PM (8 months ago)
Author:
dmsnell
Message:

KSES: Fix tests and detection of HTML Bogus Comment spans.

In [58418] a test was added without the test_ prefix in its function
name, and because of that, it wasn't run in the test suite.
The prefix has been added to ensure that it runs.

In the original patch, due to a logical bug, a recursive loop to
transform the inside contents of the bogus comments was never run
more than once. This has been fixed.

This patch also includes one more case where kses wasn't
properly detecting the bogus comment state, and adds a test case
to cover this. It limits itself to some but not all constructions
of invalid markup declaration so that it doesn't conflict with
existing behaviors around those and other kinds of invalid comments.

Props ellatrix, dmsnell.
See #61009.
Follow-up to [58418].

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/kses.php

    r58418 r58424  
    989989        |
    990990        </[^a-zA-Z][^>]*>  #  - Closing tags with invalid tag names.
     991        |
     992        <![^>]*>           #  - Invalid markup declaration nodes. Not all invalid nodes
     993                           #    are matched so as to avoid breaking legacy behaviors.
    991994    )
    992995    |
     
    11151118
    11161119    /*
    1117      * When a closing tag appears with a name that isn't a valid tag name,
    1118      * it must be interpreted as an HTML comment. It extends until the
    1119      * first `>` character after the initial opening `</`.
     1120     * When certain invalid syntax constructs appear, the HTML parser
     1121     * shifts into what's called the "bogus comment state." This is a
     1122     * plaintext state that consumes everything until the nearest `>`
     1123     * and then transforms the entire span into an HTML comment.
    11201124     *
    11211125     * Preserve these comments and do not treat them like tags.
     1126     *
     1127     * @see https://html.spec.whatwg.org/#bogus-comment-state
    11221128     */
    1123     if ( 1 === preg_match( '~^</[^a-zA-Z][^>]*>$~', $content ) ) {
    1124         $content     = substr( $content, 2, -1 );
    1125         $transformed = null;
    1126 
    1127         while ( $transformed !== $content ) {
    1128             $transformed = wp_kses( $content, $allowed_html, $allowed_protocols );
    1129             $content     = $transformed;
    1130         }
    1131 
    1132         return "</{$transformed}>";
     1129    if ( 1 === preg_match( '~^(?:</[^a-zA-Z][^>]*>|<![a-z][^>]*>)$~', $content ) ) {
     1130        /**
     1131         * Since the pattern matches `</…>` and also `<!…>`, this will
     1132         * preserve the type of the cleaned-up token in the output.
     1133         */
     1134        $opener  = $content[1];
     1135        $content = substr( $content, 2, -1 );
     1136
     1137        do {
     1138            $prev    = $content;
     1139            $content = wp_kses( $content, $allowed_html, $allowed_protocols );
     1140        } while ( $prev !== $content );
     1141
     1142        // Recombine the modified inner content with the original token structure.
     1143        return "<{$opener}{$content}>";
    11331144    }
    11341145
  • trunk/tests/phpunit/tests/kses.php

    r58418 r58424  
    19371937     * @ticket 61009
    19381938     *
     1939     * @dataProvider data_html_containing_various_kinds_of_html_comments
     1940     *
    19391941     * @param string $html_comment    HTML containing a comment; must not be a valid comment
    19401942     *                                but must be syntax which a browser interprets as a comment.
    19411943     * @param string $expected_output How `wp_kses()` ought to transform the comment.
    19421944     */
    1943     public function wp_kses_preserves_html_comments( $html_comment, $expected_output ) {
     1945    public function test_wp_kses_preserves_html_comments( $html_comment, $expected_output ) {
    19441946        $this->assertSame(
    19451947            $expected_output,
     
    19581960            'Normative HTML comment'            => array( 'before<!-- this is a comment -->after', 'before<!-- this is a comment -->after' ),
    19591961            'Closing tag with invalid tag name' => array( 'before<//not a tag>after', 'before<//not a tag>after' ),
     1962            'Incorrectly opened comment (Markup declaration)' => array( 'before<!also not a tag>after', 'before<!also not a tag>after' ),
    19601963        );
    19611964    }
Note: See TracChangeset for help on using the changeset viewer.