Make WordPress Core

Changeset 59399 for trunk


Ignore:
Timestamp:
11/13/2024 12:18:48 PM (3 months ago)
Author:
Bernhard Reiter
Message:

HTML API: Include doctype in full parser serialize.

Output DOCTYPE when calling WP_HTML_Processor::serialize on a full document that includes a DOCTYPE.

The DOCTYPE should be included in the serialized/normalized HTML output as it has an impact in how the document is handled, in particular whether the document should be handled in quirks or no-quirks mode.

This only affects the serialization of full parsers at this time because DOCTYPE tokens are currently ignored in all possible fragments. The omission of the DOCTYPE is subtle but can change the serialized document's quirks/no-quirks mode.

Props jonsurrell.
Fixes #62396.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-processor.php

    r59392 r59399  
    11791179
    11801180        switch ( $token_type ) {
     1181            case '#doctype':
     1182                $doctype = $this->get_doctype_info();
     1183                if ( null === $doctype ) {
     1184                    break;
     1185                }
     1186
     1187                $html .= '<!DOCTYPE';
     1188
     1189                if ( $doctype->name ) {
     1190                    $html .= " {$doctype->name}";
     1191                }
     1192
     1193                if ( null !== $doctype->public_identifier ) {
     1194                    $html .= " PUBLIC \"{$doctype->public_identifier}\"";
     1195                }
     1196                if ( null !== $doctype->system_identifier ) {
     1197                    if ( null === $doctype->public_identifier ) {
     1198                        $html .= ' SYSTEM';
     1199                    }
     1200                    $html .= " \"{$doctype->system_identifier}\"";
     1201                }
     1202                $html .= '>';
     1203                break;
     1204
    11811205            case '#text':
    11821206                $html .= htmlspecialchars( $this->get_modifiable_text(), ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' );
     
    11941218            case '#cdata-section':
    11951219                $html .= "<![CDATA[{$this->get_modifiable_text()}]]>";
    1196                 break;
    1197 
    1198             case 'html':
    1199                 $html .= '<!DOCTYPE html>';
    12001220                break;
    12011221        }
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php

    r59076 r59399  
    285285        );
    286286    }
     287
     288    /**
     289     * @ticket 62396
     290     *
     291     * @dataProvider data_provider_serialize_doctype
     292     */
     293    public function test_full_document_serialize_includes_doctype( string $doctype_input, string $doctype_output ) {
     294        $processor = WP_HTML_Processor::create_full_parser(
     295            "{$doctype_input}👌"
     296        );
     297        $this->assertSame(
     298            "{$doctype_output}<html><head></head><body>👌</body></html>",
     299            $processor->serialize()
     300        );
     301    }
     302
     303    /**
     304     * Data provider.
     305     *
     306     * @return array[]
     307     */
     308    public static function data_provider_serialize_doctype() {
     309        return array(
     310            'None'                   => array( '', '' ),
     311            'Empty'                  => array( '<!DOCTYPE>', '<!DOCTYPE>' ),
     312            'HTML5'                  => array( '<!DOCTYPE html>', '<!DOCTYPE html>' ),
     313            'Strange name'           => array( '<!DOCTYPE WordPress>', '<!DOCTYPE wordpress>' ),
     314            'With public'            => array( '<!DOCTYPE html PUBLIC "x">', '<!DOCTYPE html PUBLIC "x">' ),
     315            'With system'            => array( '<!DOCTYPE html SYSTEM "y">', '<!DOCTYPE html SYSTEM "y">' ),
     316            'With public and system' => array( '<!DOCTYPE html PUBLIC "x" "y">', '<!DOCTYPE html PUBLIC "x" "y">' ),
     317            'Weird casing'           => array( '<!docType HtmL pubLIc\'xxx\'"yyy" all this is ignored>', '<!DOCTYPE html PUBLIC "xxx" "yyy">' ),
     318        );
     319    }
    287320}
Note: See TracChangeset for help on using the changeset viewer.