Make WordPress Core

Changeset 58859


Ignore:
Timestamp:
08/06/2024 10:05:47 PM (6 weeks ago)
Author:
dmsnell
Message:

HTML API: Use full parser in html5lib tests.

Previously the html5lib tests have only run in the fragment parser mode,
assuming IN BODY context. This limited the number of tests which could run
and was a result of the HTML Processor only supporting the IN BODY fragment
parser. In [58836], however, a full parser was added to the HTML Processor.

In this patch the full parser is utilized in order to run more of the
previously-skipped tests, asserting more behaviors in the HTML parsing.

Developed in https://github.com/wordpress/wordpress-develop/pull/7117
Discussed in https://core.trac.wordpress.org/ticket/61646

Props: dmsnell, jonsurrell.
See #61646.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php

    r58833 r58859  
    2323class Tests_HtmlApi_Html5lib extends WP_UnitTestCase {
    2424    /**
    25      * The HTML Processor only accepts HTML in document <body>.
    26      * Do not run tests that look for anything in document <head>.
    27      */
    28     const SKIP_HEAD_TESTS = true;
    29 
    30     /**
    3125     * Skip specific tests that may not be supported or have known issues.
    3226     */
    3327    const SKIP_TESTS = array(
    34         'adoption01/line0046' => 'Unimplemented: Reconstruction of active formatting elements.',
    35         'adoption01/line0159' => 'Unimplemented: Reconstruction of active formatting elements.',
    36         'adoption01/line0318' => 'Unimplemented: Reconstruction of active formatting elements.',
    37         'template/line0885'   => 'Unimplemented: no parsing of attributes on context node.',
    38         'tests1/line0720'     => 'Unimplemented: Reconstruction of active formatting elements.',
    39         'tests15/line0001'    => 'Unimplemented: Reconstruction of active formatting elements.',
    40         'tests15/line0022'    => 'Unimplemented: Reconstruction of active formatting elements.',
    41         'tests15/line0068'    => 'Unimplemented: no support outside of IN BODY yet.',
    42         'tests2/line0650'     => 'Whitespace only test never enters "in body" parsing mode.',
    43         'tests19/line0965'    => 'Unimplemented: no support outside of IN BODY yet.',
    44         'tests23/line0001'    => 'Unimplemented: Reconstruction of active formatting elements.',
    45         'tests23/line0041'    => 'Unimplemented: Reconstruction of active formatting elements.',
    46         'tests23/line0069'    => 'Unimplemented: Reconstruction of active formatting elements.',
    47         'tests23/line0101'    => 'Unimplemented: Reconstruction of active formatting elements.',
    48         'tests26/line0263'    => 'Bug: An active formatting element should be created for a trailing text node.',
    49         'webkit01/line0231'   => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
    50         'webkit02/line0013'   => "Asserting behavior with scripting flag enabled, which this parser doesn't support.",
    51         'webkit01/line0300'   => 'Unimplemented: no support outside of IN BODY yet.',
    52         'webkit01/line0310'   => 'Unimplemented: no support outside of IN BODY yet.',
    53         'webkit01/line0336'   => 'Unimplemented: no support outside of IN BODY yet.',
    54         'webkit01/line0349'   => 'Unimplemented: no support outside of IN BODY yet.',
    55         'webkit01/line0362'   => 'Unimplemented: no support outside of IN BODY yet.',
    56         'webkit01/line0375'   => 'Unimplemented: no support outside of IN BODY yet.',
     28        'comments01/line0155'    => 'Unimplemented: Need to access raw comment text on non-normative comments.',
     29        'comments01/line0169'    => 'Unimplemented: Need to access raw comment text on non-normative comments.',
     30        'html5test-com/line0129' => 'Unimplemented: Need to access raw comment text on non-normative comments.',
     31        'noscript01/line0014'    => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
     32        'tests1/line0692'        => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly',
     33        'tests14/line0022'       => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
     34        'tests14/line0055'       => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
     35        'tests19/line0965'       => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.',
     36        'tests19/line1079'       => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
     37        'tests2/line0207'        => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
     38        'tests2/line0686'        => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
     39        'tests2/line0709'        => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
     40        'tests5/line0013'        => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.',
     41        'tests5/line0077'        => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.',
     42        'tests5/line0091'        => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly',
     43        'webkit01/line0231'      => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
    5744    );
    5845
     
    6956     * @param string $expected_tree    Tree structure of parsed HTML.
    7057     */
    71     public function test_parse( $fragment_context, $html, $expected_tree ) {
     58    public function test_parse( ?string $fragment_context, string $html, string $expected_tree ) {
    7259        $processed_tree = self::build_tree_representation( $fragment_context, $html );
    7360
     
    7562            $this->markTestSkipped( 'Test includes unsupported markup.' );
    7663        }
    77 
    78         $this->assertSame( $expected_tree, $processed_tree, "HTML was not processed correctly:\n{$html}" );
     64        $fragment_detail = $fragment_context ? " in context <{$fragment_context}>" : '';
     65
     66        /*
     67         * The HTML processor does not produce html, head, body tags if the processor does not reach them.
     68         * HTML tree construction will always produce these tags, the HTML API does not at this time.
     69         */
     70        $auto_generated_html_head_body = "<html>\n  <head>\n  <body>\n\n";
     71        $auto_generated_head_body      = "  <head>\n  <body>\n\n";
     72        $auto_generated_body           = "  <body>\n\n";
     73        if ( str_ends_with( $expected_tree, $auto_generated_html_head_body ) && ! str_ends_with( $processed_tree, $auto_generated_html_head_body ) ) {
     74            if ( str_ends_with( $processed_tree, "<html>\n  <head>\n\n" ) ) {
     75                $processed_tree = substr_replace( $processed_tree, "  <body>\n\n", -1 );
     76            } elseif ( str_ends_with( $processed_tree, "<html>\n\n" ) ) {
     77                $processed_tree = substr_replace( $processed_tree, "  <head>\n  <body>\n\n", -1 );
     78            } else {
     79                $processed_tree = substr_replace( $processed_tree, $auto_generated_html_head_body, -1 );
     80            }
     81        } elseif ( str_ends_with( $expected_tree, $auto_generated_head_body ) && ! str_ends_with( $processed_tree, $auto_generated_head_body ) ) {
     82            if ( str_ends_with( $processed_tree, "<head>\n\n" ) ) {
     83                $processed_tree = substr_replace( $processed_tree, "  <body>\n\n", -1 );
     84            } else {
     85                $processed_tree = substr_replace( $processed_tree, $auto_generated_head_body, -1 );
     86            }
     87        } elseif ( str_ends_with( $expected_tree, $auto_generated_body ) && ! str_ends_with( $processed_tree, $auto_generated_body ) ) {
     88            $processed_tree = substr_replace( $processed_tree, $auto_generated_body, -1 );
     89        }
     90
     91        $this->assertSame( $expected_tree, $processed_tree, "HTML was not processed correctly{$fragment_detail}:\n{$html}" );
    7992    }
    8093
     
    101114                $test_name  = "{$test_suite}/line{$line}";
    102115
    103                 if ( self::should_skip_test( $test_name, $test[3] ) ) {
     116                $test_context_element = $test[1];
     117
     118                if ( self::should_skip_test( $test_context_element, $test_name, $test[3] ) ) {
    104119                    continue;
    105120                }
     
    119134     * @return bool True if the test case should be skipped. False otherwise.
    120135     */
    121     private static function should_skip_test( $test_name, $expected_tree ): bool {
    122         if ( self::SKIP_HEAD_TESTS ) {
    123             $html_start = "<html>\n  <head>\n  <body>\n";
    124             if (
    125                 strlen( $expected_tree ) < strlen( $html_start ) ||
    126                 substr( $expected_tree, 0, strlen( $html_start ) ) !== $html_start
    127             ) {
    128                 return true;
    129             }
     136    private static function should_skip_test( ?string $test_context_element, string $test_name, string $expected_tree ): bool {
     137        if ( null !== $test_context_element && 'body' !== $test_context_element ) {
     138            return true;
    130139        }
    131140
     
    147156        $processor = $fragment_context
    148157            ? WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" )
    149             : WP_HTML_Processor::create_fragment( $html );
     158            : WP_HTML_Processor::create_full_parser( $html );
    150159        if ( null === $processor ) {
    151160            return null;
    152161        }
    153162
    154         $output = "<html>\n  <head>\n  <body>\n";
    155 
    156         // Initially, assume we're 2 levels deep at: html > body > [position]
    157         $indent_level = 2;
     163        /*
     164         * The fragment parser will start in 2 levels deep at: html > body > [position]
     165         * and requires adjustment to initial parameters.
     166         * The full parser will not.
     167         */
     168        $output       = $fragment_context ? "<html>\n  <head>\n  <body>\n" : '';
     169        $indent_level = $fragment_context ? 2 : 0;
    158170        $indent       = '  ';
    159171        $was_text     = null;
     
    239251                    break;
    240252
     253                case '#funky-comment':
     254                    // Comments must be "<" then "!-- " then the data then " -->".
     255                    $output .= str_repeat( $indent, $indent_level ) . "<!-- {$processor->get_modifiable_text()} -->\n";
     256                    break;
     257
    241258                case '#comment':
    242259                    switch ( $processor->get_comment_type() ) {
     
    251268                            break;
    252269
     270                        case WP_HTML_Processor::COMMENT_AS_PI_NODE_LOOKALIKE:
     271                            $comment_text_content = "?{$processor->get_tag()}{$processor->get_modifiable_text()}?";
     272                            break;
     273
    253274                        default:
    254275                            throw new Error( "Unhandled comment type for tree construction: {$processor->get_comment_type()}" );
     
    302323        $test_dom             = '';
    303324        $test_context_element = null;
     325        $test_script_flag     = false;
    304326        $test_line_number     = 0;
    305327
     
    310332                // Finish section.
    311333                if ( "#data\n" === $line ) {
    312                     // Yield when switching from a previous state.
    313                     if ( $state ) {
     334                    /*
     335                     * Yield when switching from a previous state.
     336                     * Do not yield tests with the scripting flag enabled. The scripting flag
     337                     * is always disabled in the HTML API.
     338                     */
     339                    if ( $state && ! $test_script_flag ) {
    314340                        yield array(
    315341                            $test_line_number,
     
    326352                    $test_dom             = '';
    327353                    $test_context_element = null;
     354                    $test_script_flag     = false;
     355                }
     356                if ( "#script-on\n" === $line ) {
     357                    $test_script_flag = true;
    328358                }
    329359
     
    377407                case 'document':
    378408                    if ( '|' === $line[0] ) {
    379                         $test_dom .= substr( $line, 2 );
     409                        /*
     410                         * The next_token() method these tests rely on do not stop
     411                         * at doctype nodes. Strip doctypes from output.
     412                         * @todo Restore this line if and when the processor
     413                         * exposes doctypes.
     414                         */
     415                        if ( '| <!DOCTYPE ' !== substr( $line, 0, 12 ) ) {
     416                            $test_dom .= substr( $line, 2 );
     417                        }
    380418                    } else {
    381419                        // This is a text node that includes unescaped newlines.
Note: See TracChangeset for help on using the changeset viewer.