Make WordPress Core

Changeset 59099


Ignore:
Timestamp:
09/27/2024 12:42:47 AM (6 months ago)
Author:
dmsnell
Message:

HTML API: Switch to HTML namespace when entering Integration Points.

When encountering inline SVG and MathML content in an HTML document, there are certain "integration points" which transition back into the HTML parsing ruleset. Previously, the HTML API was incorrectly switching into the namespace of the element transitioning into that ruleset.

In this patch, the correct transition is made, where all integration points refer to HTML rules, while non-integration points refer to the rules of the namespace corresponding to the token itself.

Developed in https://github.com/wordpress/wordpress-develop/pull/7425
Discussed in https://core.trac.wordpress.org/ticket/61576

Props dmsnell, jonsurrell.
See #61576.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-processor.php

    r59076 r59099  
    394394                $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance );
    395395
    396                 $this->change_parsing_namespace( $token->namespace );
     396                $this->change_parsing_namespace( $token->integration_node_type ? 'html' : $token->namespace );
    397397            }
    398398        );
     
    404404                $provenance            = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real';
    405405                $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::POP, $provenance );
     406
    406407                $adjusted_current_node = $this->get_adjusted_current_node();
    407                 $this->change_parsing_namespace(
    408                     $adjusted_current_node
    409                         ? $adjusted_current_node->namespace
    410                         : 'html'
    411                 );
     408
     409                if ( $adjusted_current_node ) {
     410                    $this->change_parsing_namespace( $adjusted_current_node->integration_node_type ? 'html' : $adjusted_current_node->namespace );
     411                } else {
     412                    $this->change_parsing_namespace( 'html' );
     413                }
    412414            }
    413415        );
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessor.php

    r59014 r59099  
    746746        );
    747747    }
     748
     749    /**
     750     * Ensures that the processor correctly adjusts the namespace
     751     * for elements inside HTML integration points.
     752     *
     753     * @ticket 61576
     754     */
     755    public function test_adjusts_for_html_integration_points_in_svg() {
     756        $processor = WP_HTML_Processor::create_full_parser(
     757            '<svg><foreignobject><image /><svg /><image />'
     758        );
     759
     760        // At the foreignObject, the processor is in the SVG namespace.
     761        $this->assertTrue(
     762            $processor->next_tag( 'foreignObject' ),
     763            'Failed to find "foreignObject" under test: check test setup.'
     764        );
     765
     766        $this->assertSame(
     767            'svg',
     768            $processor->get_namespace(),
     769            'Found the wrong namespace for the "foreignObject" element.'
     770        );
     771
     772        /*
     773         * The IMAGE tag should be handled according to HTML processing rules
     774         * and transformted to an IMG tag because `foreignObject` is an HTML
     775         * integration point. At this point, the processor is entering the HTML
     776         * integration point.
     777         */
     778        $this->assertTrue(
     779            $processor->next_tag( 'IMG' ),
     780            'Failed to find expected "IMG" tag from "<IMAGE>" source tag.'
     781        );
     782
     783        $this->assertSame(
     784            'html',
     785            $processor->get_namespace(),
     786            'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
     787        );
     788
     789        /*
     790         * Again, the IMAGE tag should be handled according to HTML processing
     791         * rules and transformted to an IMG tag because `foreignObject` is an
     792         * HTML integration point. At this point, the processor is has entered
     793         * SVG and is returning to an HTML integration point.
     794         */
     795        $this->assertTrue(
     796            $processor->next_tag( 'IMG' ),
     797            'Failed to find expected "IMG" tag from "<IMAGE>" source tag.'
     798        );
     799
     800        $this->assertSame(
     801            'html',
     802            $processor->get_namespace(),
     803            'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
     804        );
     805    }
     806
     807    /**
     808     * Ensures that the processor correctly adjusts the namespace
     809     * for elements inside MathML integration points.
     810     *
     811     * @ticket 61576
     812     */
     813    public function test_adjusts_for_mathml_integration_points() {
     814        $processor = WP_HTML_Processor::create_fragment(
     815            '<mo><image /></mo><math><image /><mo><image /></mo></math>'
     816        );
     817
     818        // Advance token-by-token to ensure matching the right raw "<image />" token.
     819        $processor->next_token(); // Advance past the +MO.
     820        $processor->next_token(); // Advance into the +IMG.
     821
     822        $this->assertSame(
     823            'IMG',
     824            $processor->get_tag(),
     825            'Failed to find expected "IMG" tag from "<IMAGE>" source tag.'
     826        );
     827
     828        $this->assertSame(
     829            'html',
     830            $processor->get_namespace(),
     831            'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
     832        );
     833
     834        // Advance token-by-token to ensure matching the right raw "<image />" token.
     835        $processor->next_token(); // Advance past the -MO.
     836        $processor->next_token(); // Advance past the +MATH.
     837        $processor->next_token(); // Advance into the +IMAGE.
     838
     839        $this->assertSame(
     840            'IMAGE',
     841            $processor->get_tag(),
     842            'Failed to find the un-transformed "<image />" tag.'
     843        );
     844
     845        $this->assertSame(
     846            'math',
     847            $processor->get_namespace(),
     848            'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
     849        );
     850
     851        $processor->next_token(); // Advance past the +MO.
     852        $processor->next_token(); // Advance into the +IMG.
     853
     854        $this->assertSame(
     855            'IMG',
     856            $processor->get_tag(),
     857            'Failed to find expected "IMG" tag from "<IMAGE>" source tag.'
     858        );
     859
     860        $this->assertSame(
     861            'html',
     862            $processor->get_namespace(),
     863            'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
     864        );
     865    }
    748866}
Note: See TracChangeset for help on using the changeset viewer.