Make WordPress Core

Changeset 58712


Ignore:
Timestamp:
07/12/2024 09:58:20 PM (3 months ago)
Author:
dmsnell
Message:

HTML API: Join successive text nodes in html5lib test representation.

Many tests from the html5lib test suite fail because of differences in
text handling between a DOM API and the HTML API, even though the
semantics of the parse are equivalent. For example, it's possible in
the HTML API to read multiple successive text nodes when the tokens
between them are ignored.

The test suite didn't account for this and so was failing tests. This
patch improves the construction of the representation to compare
against the test suite so that those tests don't fail inaccurately.

Developed in https://github.com/WordPress/wordpress-develop/pull/6984
Discussed in https://core.trac.wordpress.org/ticket/61576

Props bernhard-reiter, dmsnell, jonsurrell.
See #61576.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php

    r58281 r58712  
    4141        'tests1/line0342'           => "Closing P tag implicitly creates opener, which we don't visit.",
    4242        'tests1/line0720'           => 'Unimplemented: Reconstruction of active formatting elements.',
    43         'tests1/line0833'           => 'Bug.',
    4443        'tests15/line0001'          => 'Unimplemented: Reconstruction of active formatting elements.',
    4544        'tests15/line0022'          => 'Unimplemented: Reconstruction of active formatting elements.',
     
    5251        'tests25/line0169'          => 'Bug.',
    5352        'tests26/line0263'          => 'Bug: An active formatting element should be created for a trailing text node.',
    54         'tests7/line0354'           => 'Bug.',
    55         'tests8/line0001'           => 'Bug.',
    56         'tests8/line0020'           => 'Bug.',
    57         'tests8/line0037'           => 'Bug.',
    58         'tests8/line0052'           => 'Bug.',
    59         'webkit01/line0174'         => 'Bug.',
    6053    );
    61 
    6254
    6355    /**
     
    161153        $indent_level = 2;
    162154        $indent       = '  ';
     155        $was_text     = null;
     156        $text_node    = '';
    163157
    164158        while ( $processor->next_token() ) {
    165159            if ( ! is_null( $processor->get_last_error() ) ) {
    166160                return null;
     161            }
     162
     163            if ( $was_text && '#text' !== $processor->get_token_name() ) {
     164                $output   .= "{$text_node}\"\n";
     165                $was_text  = false;
     166                $text_node = '';
    167167            }
    168168
     
    199199                            $output .= str_repeat( $indent, $tag_indent + 1 ) . "{$attribute_name}=\"{$val}\"\n";
    200200                        }
     201
     202                        // Self-contained tags contain their inner contents as modifiable text.
     203                        $modifiable_text = $processor->get_modifiable_text();
     204                        if ( '' !== $modifiable_text ) {
     205                            $was_text = true;
     206                            if ( '' === $text_node ) {
     207                                $text_node = str_repeat( $indent, $indent_level ) . '"';
     208                            }
     209                            $text_node .= $modifiable_text;
     210                            --$indent_level;
     211                        }
    201212                    }
    202213
     
    204215
    205216                case '#text':
    206                     $output .= str_repeat( $indent, $indent_level ) . "\"{$processor->get_modifiable_text()}\"\n";
     217                    $was_text = true;
     218                    if ( '' === $text_node ) {
     219                        $text_node .= str_repeat( $indent, $indent_level ) . '"';
     220                    }
     221                    $text_node .= $processor->get_modifiable_text();
    207222                    break;
    208223
     
    237252        if ( $processor->paused_at_incomplete_token() ) {
    238253            return null;
     254        }
     255
     256        if ( '' !== $text_node ) {
     257            $output .= "${text_node}\"\n";
    239258        }
    240259
Note: See TracChangeset for help on using the changeset viewer.