Make WordPress Core

Changeset 61747


Ignore:
Timestamp:
02/26/2026 10:36:59 AM (7 weeks ago)
Author:
jonsurrell
Message:

HTML API: Preserve newlines when normalizing special elements.

Ensures normalization preserves content in PRE, LISTING, and TEXTAREA elements. These elements ignore a single leading newline during parsing. Normalization now injects a newline after the tag opener to trigger this behavior, preventing significant newlines from being incorrectly stripped.

Developed in https://github.com/WordPress/wordpress-develop/pull/10871.

Props jonsurrell, dmsnell, mukesh27.
Fixes #64607.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-processor.php

    r61699 r61747  
    14151415        $html .= '>';
    14161416
     1417        /*
     1418         * The HTML parser strips a leading newline immediately after the start
     1419         * tag of TEXTAREA, PRE, and LISTING elements. When serializing, prepend
     1420         * a leading newline to ensure the semantic HTML content is preserved.
     1421         *
     1422         * For example, `<pre>\n\nX</pre>` must not become `<pre>\nX</pre>` because its content
     1423         * has changed. However, `<pre>X</pre>` and `<pre>\nX</pre>` are _equivalent_.
     1424         *
     1425         * > A start tag whose tag name is "textarea"
     1426         * >   …
     1427         * >   If the next token is a U+000A LINE FEED (LF) character token, then ignore
     1428         * >   that token and move on to the next one. (Newlines at the start of textarea
     1429         * >   elements are ignored as an authoring convenience.)
     1430         *
     1431         * > A start tag whose tag name is one of: "pre", "listing"
     1432         * >   …
     1433         * >   If the next token is a U+000A LINE FEED (LF) character token, then ignore
     1434         * >   that token and move on to the next one. (Newlines at the start of pre blocks
     1435         * >   are ignored as an authoring convenience.)
     1436         *
     1437         * @see https://html.spec.whatwg.org/multipage/parsing.html
     1438         */
     1439        if ( 'TEXTAREA' === $tag_name || 'PRE' === $tag_name || 'LISTING' === $tag_name ) {
     1440            $html .= "\n";
     1441        }
     1442
    14171443        // Flush out self-contained elements.
    14181444        if ( $in_html && in_array( $tag_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) {
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php

    r59410 r61747  
    322322        );
    323323    }
     324
     325    /**
     326     * Ensures that leading newlines in PRE, LISTING, and TEXTAREA elements are preserved upon normalization,
     327     * and that normalization is idempotent in these cases.
     328     *
     329     * @ticket 64607
     330     *
     331     * @dataProvider data_provider_normalize_special_leading_newline_cases
     332     *
     333     * @param string $input    HTML input containing leading newlines in PRE, LISTING, or TEXTAREA elements.
     334     * @param string $expected Expected output after normalization, which should preserve leading newlines.
     335     */
     336    public function test_normalize_special_leading_newline_handling( string $input, string $expected ) {
     337        $normalized = WP_HTML_Processor::normalize( $input );
     338        $this->assertEqualHTML( $expected, $normalized );
     339        $normalized_twice = WP_HTML_Processor::normalize( $normalized );
     340        $this->assertEqualHTML( $expected, $normalized_twice );
     341    }
     342
     343    /**
     344     * Data provider.
     345     *
     346     * @return array[]
     347     */
     348    public static function data_provider_normalize_special_leading_newline_cases() {
     349        return array(
     350            'Leading newline in PRE'             => array(
     351                "<pre>\nline 1\nline 2</pre>",
     352                "<pre>line 1\nline 2</pre>",
     353            ),
     354            'Double leading newline in PRE'      => array(
     355                "<pre>\n\nline 2\nline 3</pre>",
     356                "<pre>\n\nline 2\nline 3</pre>",
     357            ),
     358            'Multiple text nodes inside PRE'     => array(
     359                "<pre>\nline 1<!--comment--> still line 1</pre>",
     360                '<pre>line 1<!--comment--> still line 1</pre>',
     361            ),
     362            'Multiple text nodes inside PRE with leading newlines' => array(
     363                "<pre>\n\nline 2<!--comment--> still line 2</pre>",
     364                "<pre>\n\nline 2<!--comment--> still line 2</pre>",
     365            ),
     366            'Leading newline in LISTING'         => array(
     367                "<listing>\nline 1\nline 2</listing>",
     368                "<listing>line 1\nline 2</listing>",
     369            ),
     370            'Double leading newline in LISTING'  => array(
     371                "<listing>\n\nline 2\nline 3</listing>",
     372                "<listing>\n\nline 2\nline 3</listing>",
     373            ),
     374            'Multiple text nodes inside LISTING' => array(
     375                "<listing>\nline 1<!--comment--> still line 1</listing>",
     376                '<listing>line 1<!--comment--> still line 1</listing>',
     377            ),
     378            'Multiple text nodes inside LISTING with leading newlines' => array(
     379                "<listing>\n\nline 2<!--comment--> still line 2</listing>",
     380                "<listing>\n\nline 2<!--comment--> still line 2</listing>",
     381            ),
     382            'Leading newline in TEXTAREA'        => array(
     383                "<textarea>\nline 1\nline 2</textarea>",
     384                "<textarea>line 1\nline 2</textarea>",
     385            ),
     386            'Double leading newline in TEXTAREA' => array(
     387                "<textarea>\n\nline 2\nline 3</textarea>",
     388                "<textarea>\n\nline 2\nline 3</textarea>",
     389            ),
     390        );
     391    }
    324392}
Note: See TracChangeset for help on using the changeset viewer.