Make WordPress Core


Ignore:
Timestamp:
11/21/2024 01:27:58 PM (18 months ago)
Author:
Bernhard Reiter
Message:

HTML API: Add method to create fragment at node.

HTML Fragment parsing always happens with a context node, which may impact how a fragment of HTML is parsed. HTML Fragment Processors can be instantiated with a BODY context node via WP_HTML_Processor::create_fragment( $html ).

This changeset adds a static method called create_fragment_at_current_node( string $html_fragment ). It can only be called when the processor is paused at a #tag, with some additional constraints:

  • The opening and closing tags must appear in the HTML input (no virtual tokens).
  • No "self-contained" elements are allowed ( IFRAME, SCRIPT, TITLE, etc.).

If successful, the method will return a WP_HTML_Processor instance whose context is inherited from the node that the method was called from.

Props jonsurrell, bernhard-reiter, gziolo.
Fixes #62357.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessor.php

    r59422 r59444  
    10451045
    10461046    /**
     1047     * @ticket 62357
     1048     */
     1049    public function test_create_fragment_at_current_node_in_foreign_content() {
     1050        $processor = WP_HTML_Processor::create_full_parser( '<svg>' );
     1051        $this->assertTrue( $processor->next_tag( 'SVG' ) );
     1052
     1053        $fragment = $processor->create_fragment_at_current_node( "\0preceded-by-nul-byte<rect /><circle></circle><foreignobject><div></div></foreignobject><g>" );
     1054
     1055        $this->assertSame( 'svg', $fragment->get_namespace() );
     1056        $this->assertTrue( $fragment->next_token() );
     1057
     1058        /*
     1059         * In HTML parsing, a nul byte would be ignored.
     1060         * In SVG it should be replaced with a replacement character.
     1061         */
     1062        $this->assertSame( '#text', $fragment->get_token_type() );
     1063        $this->assertSame( "\u{FFFD}", $fragment->get_modifiable_text() );
     1064
     1065        $this->assertTrue( $fragment->next_tag( 'RECT' ) );
     1066        $this->assertSame( 'svg', $fragment->get_namespace() );
     1067
     1068        $this->assertTrue( $fragment->next_tag( 'CIRCLE' ) );
     1069        $this->assertSame( array( 'HTML', 'SVG', 'CIRCLE' ), $fragment->get_breadcrumbs() );
     1070        $this->assertTrue( $fragment->next_tag( 'foreignObject' ) );
     1071        $this->assertSame( 'svg', $fragment->get_namespace() );
     1072    }
     1073
     1074    /**
     1075     * @ticket 62357
     1076     */
     1077    public function test_create_fragment_at_current_node_in_foreign_content_integration_point() {
     1078        $processor = WP_HTML_Processor::create_full_parser( '<svg><foreignObject>' );
     1079        $this->assertTrue( $processor->next_tag( 'foreignObject' ) );
     1080
     1081        $fragment = $processor->create_fragment_at_current_node( "<image>\0not-preceded-by-nul-byte<rect />" );
     1082
     1083        // Nothing has been processed, the html namespace should be used for parsing as an integration point.
     1084        $this->assertSame( 'html', $fragment->get_namespace() );
     1085
     1086        // HTML parsing transforms IMAGE into IMG.
     1087        $this->assertTrue( $fragment->next_tag( 'IMG' ) );
     1088
     1089        $this->assertTrue( $fragment->next_token() );
     1090
     1091        // In HTML parsing, the nul byte is ignored and the text is reached.
     1092        $this->assertSame( '#text', $fragment->get_token_type() );
     1093        $this->assertSame( 'not-preceded-by-nul-byte', $fragment->get_modifiable_text() );
     1094
     1095        /*
     1096         * svg:foreignObject is an HTML integration point, so the processor should be in the HTML namespace.
     1097         * RECT is an HTML element here, meaning it may have the self-closing flag but does not self-close.
     1098         */
     1099        $this->assertTrue( $fragment->next_tag( 'RECT' ) );
     1100        $this->assertSame( array( 'HTML', 'FOREIGNOBJECT', 'RECT' ), $fragment->get_breadcrumbs() );
     1101        $this->assertSame( 'html', $fragment->get_namespace() );
     1102        $this->assertTrue( $fragment->has_self_closing_flag() );
     1103        $this->assertTrue( $fragment->expects_closer() );
     1104    }
     1105
     1106    /**
    10471107     * Ensure that lowercased tag_name query matches tags case-insensitively.
    10481108     *
Note: See TracChangeset for help on using the changeset viewer.