Make WordPress Core


Ignore:
Timestamp:
07/31/2024 04:54:23 PM (2 months ago)
Author:
dmsnell
Message:

HTML API: Introduce full parsing mode in HTML Processor.

The HTML Processor has only supported a specific kind of parsing mode
called _the fragment parsing mode_, where it behaves in the same way
that node.innerHTML = html does in the DOM. This mode assumes a
context node and doesn't support parsing an entire document.

As part of work to add more spec support to the HTML API, this patch
introduces a full parsing mode, which can parse a full HTML document
from start to end, including the doctype declaration and head tags.

Developed in https://github.com/wordpress/wordpress-develop/pull/6977
Discussed in https://core.trac.wordpress.org/ticket/61576

Props: dmsnell, jonsurrell.
See #61576.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php

    r58833 r58836  
    2626        $processor = WP_HTML_Processor::create_fragment( $html );
    2727
    28         $this->assertTrue( $processor->step(), "Failed to step into supported {$tag_name} element." );
     28        $this->assertTrue( $processor->next_token(), "Failed to step into supported {$tag_name} element." );
    2929        $this->assertSame( $tag_name, $processor->get_tag(), "Misread {$tag_name} as a {$processor->get_tag()} element." );
    3030    }
     
    9191            'INS',
    9292            'LI',
     93            'LINK',
    9394            'ISINDEX', // Deprecated.
    9495            'KBD',
     
    109110            'NEXTID', // Deprecated.
    110111            'NOBR', // Neutralized.
     112            'NOEMBED', // Neutralized.
     113            'NOFRAMES', // Neutralized.
    111114            'NOSCRIPT',
    112115            'OBJECT',
     
    123126            'RUBY',
    124127            'SAMP',
     128            'SCRIPT',
    125129            'SEARCH',
    126130            'SECTION',
     
    131135            'STRIKE',
    132136            'STRONG',
     137            'STYLE',
    133138            'SUB',
    134139            'SUMMARY',
    135140            'SUP',
    136141            'TABLE',
     142            'TEXTAREA',
    137143            'TIME',
     144            'TITLE',
    138145            'TT',
    139146            'U',
     
    141148            'VAR',
    142149            'VIDEO',
     150            'XMP', // Deprecated, use PRE instead.
    143151        );
    144152
    145153        $data = array();
    146154        foreach ( $supported_elements as $tag_name ) {
    147             $data[ $tag_name ] = array( "<{$tag_name}>", $tag_name );
     155            $closer = in_array( $tag_name, array( 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true )
     156                ? "</{$tag_name}>"
     157                : '';
     158
     159            $data[ $tag_name ] = array( "<{$tag_name}>{$closer}", $tag_name );
    148160        }
    149161
     
    183195    public static function data_unsupported_elements() {
    184196        $unsupported_elements = array(
    185             'BODY',
    186             'FRAME',
    187             'FRAMESET',
    188             'HEAD',
    189             'HTML',
    190             'IFRAME',
    191197            'MATH',
    192             'NOEMBED', // Neutralized.
    193             'NOFRAMES', // Neutralized.
    194198            'PLAINTEXT', // Neutralized.
    195             'SCRIPT',
    196             'STYLE',
    197199            'SVG',
    198             'TEXTAREA',
    199             'TITLE',
    200             'XMP', // Deprecated, use PRE instead.
    201200        );
    202201
Note: See TracChangeset for help on using the changeset viewer.