Make WordPress Core

Changeset 59469


Ignore:
Timestamp:
11/28/2024 02:25:51 PM (8 weeks ago)
Author:
Bernhard Reiter
Message:

HTML API: Make non-body fragment creation methods private.

The current implementation of create_fragment (and the underlying create_fragment_at_current_node) allows passing in a context that might result in a tree that cannot be represented by HTML. For example, a user might use <p> as context, and attempt to create a fragment that also consists of a paragraph element, <p>like this. This would result in a paragraph node nested inside another -- something that can never result from parsing HTML.

To prevent this, this changeset makes create_fragment_at_current_node private and limits create_fragment to only <body> as context, while a comprehensive solution to allow other contexts is being worked on.

Follow-up to [59444], [59467].
Props jonsurrell, dmsnell, bernhard-reiter.
Fixes #62584.

Location:
trunk
Files:
1 deleted
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-processor.php

    r59467 r59469  
    280280     * impact the parse, such as with a SCRIPT tag and its `type` attribute.
    281281     *
    282      * Example:
    283      *
    284      *     // Usually, snippets of HTML ought to be processed in the default `<body>` context:
    285      *     $processor = WP_HTML_Processor::create_fragment( '<p>Hi</p>' );
    286      *
    287      *     // Some fragments should be processed in the correct context like this SVG:
    288      *     $processor = WP_HTML_Processor::create_fragment( '<rect width="10" height="10" />', '<svg>' );
    289      *
    290      *     // This fragment with TD tags should be processed in a TR context:
    291      *     $processor = WP_HTML_Processor::create_fragment(
    292      *         '<td>1<td>2<td>3',
    293      *         '<table><tbody><tr>'
    294      *     );
    295      *
    296      * In order to create a fragment processor at the correct location, the
    297      * provided fragment will be processed as part of a full HTML document.
    298      * The processor will search for the last opener tag in the document and
    299      * create a fragment processor at that location. The document will be
    300      * forced into "no-quirks" mode by including the HTML5 doctype.
    301      *
    302      * For advanced usage and precise control over the context element, use
    303      * `WP_HTML_Processor::create_full_processor()` and
    304      * `WP_HTML_Processor::create_fragment_at_current_node()`.
    305      *
    306      * UTF-8 is the only allowed encoding. If working with a document that
    307      * isn't UTF-8, first convert the document to UTF-8, then pass in the
    308      * converted HTML.
     282     * ## Current HTML Support
     283     *
     284     *  - The only supported context is `<body>`, which is the default value.
     285     *  - The only supported document encoding is `UTF-8`, which is the default value.
    309286     *
    310287     * @since 6.4.0
    311288     * @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances.
    312      * @since 6.8.0 Can create fragments with any context element.
    313289     *
    314290     * @param string $html     Input HTML fragment to process.
    315      * @param string $context  Context element for the fragment. Defaults to `<body>`.
     291     * @param string $context  Context element for the fragment, must be default of `<body>`.
    316292     * @param string $encoding Text encoding of the document; must be default of 'UTF-8'.
    317293     * @return static|null The created processor if successful, otherwise null.
    318294     */
    319295    public static function create_fragment( $html, $context = '<body>', $encoding = 'UTF-8' ) {
     296        if ( '<body>' !== $context || 'UTF-8' !== $encoding ) {
     297            return null;
     298        }
     299
    320300        $context_processor = static::create_full_parser( "<!DOCTYPE html>{$context}", $encoding );
    321301        if ( null === $context_processor ) {
     
    476456     * @return static|null The created processor if successful, otherwise null.
    477457     */
    478     public function create_fragment_at_current_node( string $html ) {
     458    private function create_fragment_at_current_node( string $html ) {
    479459        if ( $this->get_token_type() !== '#tag' || $this->is_tag_closer() ) {
    480460            _doing_it_wrong(
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php

    r59467 r59469  
    139139     */
    140140    private static function should_skip_test( ?string $test_context_element, string $test_name ): bool {
     141        if ( null !== $test_context_element && 'body' !== $test_context_element ) {
     142            return true;
     143        }
     144
    141145        if ( array_key_exists( $test_name, self::SKIP_TESTS ) ) {
    142146            return true;
     
    154158     */
    155159    private static function build_tree_representation( ?string $fragment_context, string $html ) {
    156         if ( $fragment_context ) {
    157             /*
    158              * If the string of characters starts with "svg ", the context
    159              * element is in the SVG namespace and the substring after
    160              * "svg " is the local name. If the string of characters starts
    161              * with "math ", the context element is in the MathML namespace
    162              * and the substring after "math " is the local name.
    163              * Otherwise, the context element is in the HTML namespace and
    164              * the string is the local name.
    165              */
    166             if ( str_starts_with( $fragment_context, 'svg ' ) ) {
    167                 $tag_name = substr( $fragment_context, 4 );
    168                 if ( 'svg' === $tag_name ) {
    169                     $fragment_context_html = '<svg>';
    170                 } else {
    171                     $fragment_context_html = "<svg><{$tag_name}>";
    172                 }
    173             } elseif ( str_starts_with( $fragment_context, 'math ' ) ) {
    174                 $tag_name = substr( $fragment_context, 5 );
    175                 if ( 'math' === $tag_name ) {
    176                     $fragment_context_html = '<math>';
    177                 } else {
    178                     $fragment_context_html = "<math><{$tag_name}>";
    179                 }
    180             } else {
    181                 // Tags that only appear in tables need a special case.
    182                 if ( in_array(
    183                     $fragment_context,
    184                     array(
    185                         'caption',
    186                         'col',
    187                         'colgroup',
    188                         'tbody',
    189                         'td',
    190                         'tfoot',
    191                         'th',
    192                         'thead',
    193                         'tr',
    194                     ),
    195                     true
    196                 ) ) {
    197                     $fragment_context_html = "<table><{$fragment_context}>";
    198                 } else {
    199                     $fragment_context_html = "<{$fragment_context}>";
    200                 }
    201             }
    202 
    203             $processor = WP_HTML_Processor::create_fragment( $html, $fragment_context_html );
    204 
    205             if ( null === $processor ) {
    206                 throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() );
    207             }
    208         } else {
    209             $processor = WP_HTML_Processor::create_full_parser( $html );
    210             if ( null === $processor ) {
    211                 throw new Exception( 'Could not create a full parser.' );
    212             }
     160        $processor = $fragment_context
     161            ? WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" )
     162            : WP_HTML_Processor::create_full_parser( $html );
     163        if ( null === $processor ) {
     164            throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() );
    213165        }
    214166
Note: See TracChangeset for help on using the changeset viewer.