Make WordPress Core


Ignore:
Timestamp:
08/08/2024 07:23:53 AM (15 months ago)
Author:
dmsnell
Message:

HTML API: Add support for SVG and MathML (Foreign content)

As part of work to add more spec support to the HTML API, this patch adds
support for SVG and MathML elements, or more generally, "foreign content."

The rules in foreign content are a mix of XML and HTML parsing rules and
introduce additional complexity into the processor, but is important in
order to avoid getting lost when inside these elements.

Developed in https://github.com/wordpress/wordpress-develop/pull/6006
Discussed in https://core.trac.wordpress.org/ticket/61576

Props: dmsnell, jonsurrell, westonruter.
See #61576.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-processor.php

    r58841 r58867  
    308308        $processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 );
    309309
    310         $processor->state->stack_of_open_elements->push(
    311             new WP_HTML_Token(
    312                 'root-node',
    313                 'HTML',
    314                 false
    315             )
     310        $root_node = new WP_HTML_Token(
     311            'root-node',
     312            'HTML',
     313            false
    316314        );
     315
     316        $processor->state->stack_of_open_elements->push( $root_node );
    317317
    318318        $context_node = new WP_HTML_Token(
     
    393393                $provenance            = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real';
    394394                $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance );
     395
     396                $this->change_parsing_namespace( $token->namespace );
    395397            }
    396398        );
     
    402404                $provenance            = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real';
    403405                $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::POP, $provenance );
     406                $adjusted_current_node = $this->get_adjusted_current_node();
     407                $this->change_parsing_namespace(
     408                    $adjusted_current_node
     409                        ? $adjusted_current_node->namespace
     410                        : 'html'
     411                );
    404412            }
    405413        );
     
    768776     * closing as soon as the processor advances to the next token.
    769777     *
     778     * @todo Review the self-closing logic when no node is present, ensure it
     779     *       matches the expectations in `step()`.
     780     *
    770781     * @since 6.6.0
    771      *
    772      * @todo When adding support for foreign content, ensure that
    773      *       this returns false for self-closing elements in the
    774      *       SVG and MathML namespace.
    775782     *
    776783     * @param WP_HTML_Token|null $node Optional. Node to examine, if provided.
     
    779786     *                   or `null` if not matched on any token.
    780787     */
    781     public function expects_closer( $node = null ): ?bool {
    782         $token_name = $node->node_name ?? $this->get_token_name();
     788    public function expects_closer( WP_HTML_Token $node = null ): ?bool {
     789        $token_name      = $node->node_name ?? $this->get_token_name();
     790        $token_namespace = $node->namespace ?? $this->get_namespace();
     791
    783792        if ( ! isset( $token_name ) ) {
    784793            return null;
     
    793802            self::is_void( $token_name ) ||
    794803            // Special atomic elements.
    795             in_array( $token_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true )
     804            ( 'html' === $token_namespace && in_array( $token_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) ||
     805            // Self-closing elements in foreign content.
     806            ( isset( $node ) && 'html' !== $node->namespace && $node->has_self_closing_flag )
    796807        );
    797808    }
     
    825836             * When moving on to the next node, therefore, if the bottom-most element
    826837             * on the stack is a void element, it must be closed.
    827              *
    828              * @todo Once self-closing foreign elements and BGSOUND are supported,
    829              *        they must also be implicitly closed here too. BGSOUND is
    830              *        special since it's only self-closing if the self-closing flag
    831              *        is provided in the opening tag, otherwise it expects a tag closer.
    832838             */
    833839            $top_node = $this->state->stack_of_open_elements->current_node();
    834             if ( isset( $top_node ) && ! static::expects_closer( $top_node ) ) {
     840            if ( isset( $top_node ) && ! $this->expects_closer( $top_node ) ) {
    835841                $this->state->stack_of_open_elements->pop();
    836842            }
     
    849855        }
    850856
    851         $this->state->current_token = new WP_HTML_Token(
    852             $this->bookmark_token(),
    853             $this->get_token_name(),
    854             $this->has_self_closing_flag(),
    855             $this->release_internal_bookmark_on_destruct
     857        $adjusted_current_node = $this->get_adjusted_current_node();
     858        $is_closer             = $this->is_tag_closer();
     859        $is_start_tag          = WP_HTML_Tag_Processor::STATE_MATCHED_TAG === $this->parser_state && ! $is_closer;
     860        $token_name            = $this->get_token_name();
     861
     862        if ( self::REPROCESS_CURRENT_NODE !== $node_to_process ) {
     863            $this->state->current_token = new WP_HTML_Token(
     864                $this->bookmark_token(),
     865                $token_name,
     866                $this->has_self_closing_flag(),
     867                $this->release_internal_bookmark_on_destruct
     868            );
     869        }
     870
     871        $parse_in_current_insertion_mode = (
     872            0 === $this->state->stack_of_open_elements->count() ||
     873            'html' === $adjusted_current_node->namespace ||
     874            (
     875                'math' === $adjusted_current_node->integration_node_type &&
     876                (
     877                    ( $is_start_tag && ! in_array( $token_name, array( 'MGLYPH', 'MALIGNMARK' ), true ) ) ||
     878                    '#text' === $token_name
     879                )
     880            ) ||
     881            (
     882                'math' === $adjusted_current_node->namespace &&
     883                'ANNOTATION-XML' === $adjusted_current_node->node_name &&
     884                $is_start_tag && 'SVG' === $token_name
     885            ) ||
     886            (
     887                'html' === $adjusted_current_node->integration_node_type &&
     888                ( $is_start_tag || '#text' === $token_name )
     889            )
    856890        );
    857891
    858892        try {
     893            if ( ! $parse_in_current_insertion_mode ) {
     894                return $this->step_in_foreign_content();
     895            }
     896
    859897            switch ( $this->state->insertion_mode ) {
    860898                case WP_HTML_Processor_State::INSERTION_MODE_INITIAL:
     
    923961                case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET:
    924962                    return $this->step_after_after_frameset();
    925 
    926                 case WP_HTML_Processor_State::INSERTION_MODE_IN_FOREIGN_CONTENT:
    927                     return $this->step_in_foreign_content();
    928963
    929964                // This should be unreachable but PHP doesn't have total type checking on switch.
     
    18541889                if (
    18551890                    1 === $this->state->stack_of_open_elements->count() ||
    1856                     'BODY' !== $this->state->stack_of_open_elements->at( 2 ) ||
     1891                    'BODY' !== ( $this->state->stack_of_open_elements->at( 2 )->node_name ?? null ) ||
    18571892                    $this->state->stack_of_open_elements->contains( 'TEMPLATE' )
    18581893                ) {
     
    18801915                if (
    18811916                    1 === $this->state->stack_of_open_elements->count() ||
    1882                     'BODY' !== $this->state->stack_of_open_elements->at( 2 ) ||
     1917                    'BODY' !== ( $this->state->stack_of_open_elements->at( 2 )->node_name ?? null ) ||
    18831918                    false === $this->state->frameset_ok
    18841919                ) {
     
    20762111                    'DIV' !== $node->node_name &&
    20772112                    'P' !== $node->node_name &&
    2078                     $this->is_special( $node->node_name )
     2113                    self::is_special( $node )
    20792114                ) {
    20802115                    /*
     
    21372172             * > "figcaption", "figure", "footer", "header", "hgroup", "listing", "main",
    21382173             * > "menu", "nav", "ol", "pre", "search", "section", "summary", "ul"
    2139              *
    2140              * @todo This needs to check if the element in scope is an HTML element, meaning that
    2141              *       when SVG and MathML support is added, this needs to differentiate between an
    2142              *       HTML element of the given name, such as `<center>`, and a foreign element of
    2143              *       the same given name.
    21442174             */
    21452175            case '-ADDRESS':
     
    24122442            /*
    24132443             * > A end tag token whose tag name is one of: "applet", "marquee", "object"
    2414              *
    2415              * @todo This needs to check if the element in scope is an HTML element, meaning that
    2416              *       when SVG and MathML support is added, this needs to differentiate between an
    2417              *       HTML element of the given name, such as `<object>`, and a foreign element of
    2418              *       the same given name.
    24192444             */
    24202445            case '-APPLET':
     
    26802705                 * These ought to be handled in the attribute methods.
    26812706                 */
    2682 
    2683                 $this->bail( 'Cannot process MATH element, opening foreign content.' );
    2684                 break;
     2707                $this->state->current_token->namespace = 'math';
     2708                $this->insert_html_element( $this->state->current_token );
     2709                if ( $this->state->current_token->has_self_closing_flag ) {
     2710                    $this->state->stack_of_open_elements->pop();
     2711                }
     2712                return true;
    26852713
    26862714            /*
     
    26962724                 * These ought to be handled in the attribute methods.
    26972725                 */
    2698 
    2699                 $this->bail( 'Cannot process SVG element, opening foreign content.' );
    2700                 break;
     2726                $this->state->current_token->namespace = 'svg';
     2727                $this->insert_html_element( $this->state->current_token );
     2728                if ( $this->state->current_token->has_self_closing_flag ) {
     2729                    $this->state->stack_of_open_elements->pop();
     2730                }
     2731                return true;
    27012732
    27022733            /*
     
    27382769             */
    27392770            foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
    2740                 /*
    2741                  * @todo This needs to check if the element in scope is an HTML element, meaning that
    2742                  *       when SVG and MathML support is added, this needs to differentiate between an
    2743                  *       HTML element of the given name, such as `<object>`, and a foreign element of
    2744                  *       the same given name.
    2745                  */
    2746                 if ( $token_name === $node->node_name ) {
     2771                if ( 'html' === $node->namespace && $token_name === $node->node_name ) {
    27472772                    break;
    27482773                }
    27492774
    2750                 if ( self::is_special( $node->node_name ) ) {
     2775                if ( self::is_special( $node ) ) {
    27512776                    // This is a parse error, ignore the token.
    27522777                    return $this->step();
     
    40704095     */
    40714096    private function step_in_foreign_content(): bool {
    4072         $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_FOREIGN_CONTENT . ' state.' );
     4097        $tag_name   = $this->get_token_name();
     4098        $token_type = $this->get_token_type();
     4099        $op_sigil   = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
     4100        $op         = "{$op_sigil}{$tag_name}";
     4101
     4102        /*
     4103         * > A start tag whose name is "font", if the token has any attributes named "color", "face", or "size"
     4104         *
     4105         * This section drawn out above the switch to more easily incorporate
     4106         * the additional rules based on the presence of the attributes.
     4107         */
     4108        if (
     4109            '+FONT' === $op &&
     4110            (
     4111                null !== $this->get_attribute( 'color' ) ||
     4112                null !== $this->get_attribute( 'face' ) ||
     4113                null !== $this->get_attribute( 'size' )
     4114            )
     4115        ) {
     4116            $op = '+FONT with attributes';
     4117        }
     4118
     4119        switch ( $op ) {
     4120            case '#text':
     4121                /*
     4122                 * > A character token that is U+0000 NULL
     4123                 *
     4124                 * This is handled by `get_modifiable_text()`.
     4125                 */
     4126
     4127                /*
     4128                 * Whitespace-only text does not affect the frameset-ok flag.
     4129                 * It is probably inter-element whitespace, but it may also
     4130                 * contain character references which decode only to whitespace.
     4131                 */
     4132                $text = $this->get_modifiable_text();
     4133                if ( strlen( $text ) !== strspn( $text, " \t\n\f\r" ) ) {
     4134                    $this->state->frameset_ok = false;
     4135                }
     4136
     4137                $this->insert_foreign_element( $this->state->current_token, false );
     4138                return true;
     4139
     4140            /*
     4141             * > A comment token
     4142             */
     4143            case '#cdata-section':
     4144            case '#comment':
     4145            case '#funky_comment':
     4146                $this->insert_foreign_element( $this->state->current_token, false );
     4147                return true;
     4148
     4149            /*
     4150             * > A DOCTYPE token
     4151             */
     4152            case 'html':
     4153                // Parse error: ignore the token.
     4154                return $this->step();
     4155
     4156            /*
     4157             * > A start tag whose tag name is "b", "big", "blockquote", "body", "br", "center",
     4158             * > "code", "dd", "div", "dl", "dt", "em", "embed", "h1", "h2", "h3", "h4", "h5",
     4159             * > "h6", "head", "hr", "i", "img", "li", "listing", "menu", "meta", "nobr", "ol",
     4160             * > "p", "pre", "ruby", "s", "small", "span", "strong", "strike", "sub", "sup",
     4161             * > "table", "tt", "u", "ul", "var"
     4162             *
     4163             * > A start tag whose name is "font", if the token has any attributes named "color", "face", or "size"
     4164             *
     4165             * > An end tag whose tag name is "br", "p"
     4166             *
     4167             * Closing BR tags are always reported by the Tag Processor as opening tags.
     4168             */
     4169            case '+B':
     4170            case '+BIG':
     4171            case '+BLOCKQUOTE':
     4172            case '+BODY':
     4173            case '+BR':
     4174            case '+CENTER':
     4175            case '+CODE':
     4176            case '+DD':
     4177            case '+DIV':
     4178            case '+DL':
     4179            case '+DT':
     4180            case '+EM':
     4181            case '+EMBED':
     4182            case '+H1':
     4183            case '+H2':
     4184            case '+H3':
     4185            case '+H4':
     4186            case '+H5':
     4187            case '+H6':
     4188            case '+HEAD':
     4189            case '+HR':
     4190            case '+I':
     4191            case '+IMG':
     4192            case '+LI':
     4193            case '+LISTING':
     4194            case '+MENU':
     4195            case '+META':
     4196            case '+NOBR':
     4197            case '+OL':
     4198            case '+P':
     4199            case '+PRE':
     4200            case '+RUBY':
     4201            case '+S':
     4202            case '+SMALL':
     4203            case '+SPAN':
     4204            case '+STRONG':
     4205            case '+STRIKE':
     4206            case '+SUB':
     4207            case '+SUP':
     4208            case '+TABLE':
     4209            case '+TT':
     4210            case '+U':
     4211            case '+UL':
     4212            case '+VAR':
     4213            case '+FONT with attributes':
     4214            case '-BR':
     4215            case '-P':
     4216                // @todo Indicate a parse error once it's possible.
     4217                foreach ( $this->state->stack_of_open_elements->walk_up() as $current_node ) {
     4218                    if (
     4219                        'math' === $current_node->integration_node_type ||
     4220                        'html' === $current_node->integration_node_type ||
     4221                        'html' === $current_node->namespace
     4222                    ) {
     4223                        break;
     4224                    }
     4225
     4226                    $this->state->stack_of_open_elements->pop();
     4227                }
     4228                return $this->step( self::REPROCESS_CURRENT_NODE );
     4229        }
     4230
     4231        /*
     4232         * > Any other start tag
     4233         */
     4234        if ( ! $this->is_tag_closer() ) {
     4235            $this->insert_foreign_element( $this->state->current_token, false );
     4236
     4237            /*
     4238             * > If the token has its self-closing flag set, then run
     4239             * > the appropriate steps from the following list:
     4240             */
     4241            if ( $this->state->current_token->has_self_closing_flag ) {
     4242                if ( 'SCRIPT' === $this->state->current_token->node_name && 'svg' === $this->state->current_token->namespace ) {
     4243                    /*
     4244                     * > Acknowledge the token's self-closing flag, and then act as
     4245                     * > described in the steps for a "script" end tag below.
     4246                     *
     4247                     * @todo Verify that this shouldn't be handled by the rule for
     4248                     *       "An end tag whose name is 'script', if the current node
     4249                     *       is an SVG script element."
     4250                     */
     4251                    goto in_foreign_content_any_other_end_tag;
     4252                } else {
     4253                    $this->state->stack_of_open_elements->pop();
     4254                }
     4255            }
     4256            return true;
     4257        }
     4258
     4259        /*
     4260         * > An end tag whose name is "script", if the current node is an SVG script element.
     4261         */
     4262        if ( $this->is_tag_closer() && 'SCRIPT' === $this->state->current_token->node_name && 'svg' === $this->state->current_token->namespace ) {
     4263            $this->state->stack_of_open_elements->pop();
     4264        }
     4265
     4266        /*
     4267         * > Any other end tag
     4268         */
     4269        if ( $this->is_tag_closer() ) {
     4270            in_foreign_content_any_other_end_tag:
     4271            $node = $this->state->stack_of_open_elements->current_node();
     4272            if ( $tag_name !== $node->node_name ) {
     4273                // @todo Indicate a parse error once it's possible.
     4274            }
     4275            in_foreign_content_end_tag_loop:
     4276            if ( $node === $this->state->stack_of_open_elements->at( 1 ) ) {
     4277                return true;
     4278            }
     4279
     4280            /*
     4281             * > If node's tag name, converted to ASCII lowercase, is the same as the tag name
     4282             * > of the token, pop elements from the stack of open elements until node has
     4283             * > been popped from the stack, and then return.
     4284             */
     4285            if ( 0 === strcasecmp( $node->node_name, $tag_name ) ) {
     4286                foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
     4287                    $this->state->stack_of_open_elements->pop();
     4288                    if ( $node === $item ) {
     4289                        return true;
     4290                    }
     4291                }
     4292            }
     4293
     4294            foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) {
     4295                $node = $item;
     4296                break;
     4297            }
     4298
     4299            if ( 'html' !== $node->namespace ) {
     4300                goto in_foreign_content_end_tag_loop;
     4301            }
     4302
     4303            switch ( $this->state->insertion_mode ) {
     4304                case WP_HTML_Processor_State::INSERTION_MODE_INITIAL:
     4305                    return $this->step_initial();
     4306
     4307                case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML:
     4308                    return $this->step_before_html();
     4309
     4310                case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD:
     4311                    return $this->step_before_head();
     4312
     4313                case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD:
     4314                    return $this->step_in_head();
     4315
     4316                case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD_NOSCRIPT:
     4317                    return $this->step_in_head_noscript();
     4318
     4319                case WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD:
     4320                    return $this->step_after_head();
     4321
     4322                case WP_HTML_Processor_State::INSERTION_MODE_IN_BODY:
     4323                    return $this->step_in_body();
     4324
     4325                case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE:
     4326                    return $this->step_in_table();
     4327
     4328                case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_TEXT:
     4329                    return $this->step_in_table_text();
     4330
     4331                case WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION:
     4332                    return $this->step_in_caption();
     4333
     4334                case WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP:
     4335                    return $this->step_in_column_group();
     4336
     4337                case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY:
     4338                    return $this->step_in_table_body();
     4339
     4340                case WP_HTML_Processor_State::INSERTION_MODE_IN_ROW:
     4341                    return $this->step_in_row();
     4342
     4343                case WP_HTML_Processor_State::INSERTION_MODE_IN_CELL:
     4344                    return $this->step_in_cell();
     4345
     4346                case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT:
     4347                    return $this->step_in_select();
     4348
     4349                case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE:
     4350                    return $this->step_in_select_in_table();
     4351
     4352                case WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE:
     4353                    return $this->step_in_template();
     4354
     4355                case WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY:
     4356                    return $this->step_after_body();
     4357
     4358                case WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET:
     4359                    return $this->step_in_frameset();
     4360
     4361                case WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET:
     4362                    return $this->step_after_frameset();
     4363
     4364                case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY:
     4365                    return $this->step_after_after_body();
     4366
     4367                case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET:
     4368                    return $this->step_after_after_frameset();
     4369
     4370                // This should be unreachable but PHP doesn't have total type checking on switch.
     4371                default:
     4372                    $this->bail( "Unaware of the requested parsing mode: '{$this->state->insertion_mode}'." );
     4373            }
     4374        }
    40734375    }
    40744376
     
    40994401     * HTML semantic overrides for Tag Processor
    41004402     */
     4403
     4404    /**
     4405     * Indicates the namespace of the current token, or "html" if there is none.
     4406     *
     4407     * @return string One of "html", "math", or "svg".
     4408     */
     4409    public function get_namespace(): string {
     4410        if ( ! isset( $this->current_element ) ) {
     4411            return 'html';
     4412        }
     4413
     4414        return $this->current_element->token->namespace;
     4415    }
    41014416
    41024417    /**
     
    47365051
    47375052    /**
     5053     * Returns the adjusted current node.
     5054     *
     5055     * > The adjusted current node is the context element if the parser was created as
     5056     * > part of the HTML fragment parsing algorithm and the stack of open elements
     5057     * > has only one element in it (fragment case); otherwise, the adjusted current
     5058     * > node is the current node.
     5059     *
     5060     * @see https://html.spec.whatwg.org/#adjusted-current-node
     5061     *
     5062     * @since 6.7.0
     5063     *
     5064     * @return WP_HTML_Token|null The adjusted current node.
     5065     */
     5066    private function get_adjusted_current_node(): ?WP_HTML_Token {
     5067        if ( isset( $this->context_node ) && 1 === $this->state->stack_of_open_elements->count() ) {
     5068            return $this->context_node;
     5069        }
     5070
     5071        return $this->state->stack_of_open_elements->current_node();
     5072    }
     5073
     5074    /**
    47385075     * Reconstructs the active formatting elements.
    47395076     *
     
    50445381                }
    50455382
    5046                 if ( self::is_special( $item->node_name ) ) {
     5383                if ( self::is_special( $item ) ) {
    50475384                    $furthest_block = $item;
    50485385                    break;
     
    51135450
    51145451    /**
     5452     * Inserts a foreign element on to the stack of open elements.
     5453     *
     5454     * @since 6.7.0
     5455     *
     5456     * @see https://html.spec.whatwg.org/#insert-a-foreign-element
     5457     *
     5458     * @param WP_HTML_Token $token                     Insert this token. The token's namespace and
     5459     *                                                 insertion point will be updated correctly.
     5460     * @param bool          $only_add_to_element_stack Whether to skip the "insert an element at the adjusted
     5461     *                                                 insertion location" algorithm when adding this element.
     5462     */
     5463    private function insert_foreign_element( WP_HTML_Token $token, bool $only_add_to_element_stack ): void {
     5464        $adjusted_current_node = $this->get_adjusted_current_node();
     5465
     5466        $token->namespace = $adjusted_current_node ? $adjusted_current_node->namespace : 'html';
     5467
     5468        if ( $this->is_mathml_integration_point() ) {
     5469            $token->integration_node_type = 'math';
     5470        } elseif ( $this->is_html_integration_point() ) {
     5471            $token->integration_node_type = 'html';
     5472        }
     5473
     5474        if ( false === $only_add_to_element_stack ) {
     5475            /*
     5476             * @todo Implement the "appropriate place for inserting a node" and the
     5477             *       "insert an element at the adjusted insertion location" algorithms.
     5478             *
     5479             * These algorithms mostly impacts DOM tree construction and not the HTML API.
     5480             * Here, there's no DOM node onto which the element will be appended, so the
     5481             * parser will skip this step.
     5482             *
     5483             * @see https://html.spec.whatwg.org/#insert-an-element-at-the-adjusted-insertion-location
     5484             */
     5485        }
     5486
     5487        $this->insert_html_element( $token );
     5488    }
     5489
     5490    /**
    51155491     * Inserts a virtual element on the stack of open elements.
    51165492     *
     
    51385514
    51395515    /**
     5516     * Indicates if the current token is a MathML integration point.
     5517     *
     5518     * @since 6.7.0
     5519     *
     5520     * @see https://html.spec.whatwg.org/#mathml-text-integration-point
     5521     *
     5522     * @return bool Whether the current token is a MathML integration point.
     5523     */
     5524    private function is_mathml_integration_point(): bool {
     5525        $current_token = $this->state->current_token;
     5526        if ( ! isset( $current_token ) ) {
     5527            return false;
     5528        }
     5529
     5530        if ( 'math' !== $current_token->namespace || 'M' !== $current_token->node_name[0] ) {
     5531            return false;
     5532        }
     5533
     5534        $tag_name = $current_token->node_name;
     5535
     5536        return (
     5537            'MI' === $tag_name ||
     5538            'MO' === $tag_name ||
     5539            'MN' === $tag_name ||
     5540            'MS' === $tag_name ||
     5541            'MTEXT' === $tag_name
     5542        );
     5543    }
     5544
     5545    /**
     5546     * Indicates if the current token is an HTML integration point.
     5547     *
     5548     * Note that this method must be an instance method with access
     5549     * to the current token, since it needs to examine the attributes
     5550     * of the currently-matched tag, if it's in the MathML namespace.
     5551     * Otherwise it would be required to scan the HTML and ensure that
     5552     * no other accounting is overlooked.
     5553     *
     5554     * @since 6.7.0
     5555     *
     5556     * @see https://html.spec.whatwg.org/#html-integration-point
     5557     *
     5558     * @return bool Whether the current token is an HTML integration point.
     5559     */
     5560    private function is_html_integration_point(): bool {
     5561        $current_token = $this->state->current_token;
     5562        if ( ! isset( $current_token ) ) {
     5563            return false;
     5564        }
     5565
     5566        if ( 'html' === $current_token->namespace ) {
     5567            return false;
     5568        }
     5569
     5570        $tag_name = $current_token->node_name;
     5571
     5572        if ( 'svg' === $current_token->namespace ) {
     5573            return (
     5574                'DESC' === $tag_name ||
     5575                'FOREIGNOBJECT' === $tag_name ||
     5576                'TITLE' === $tag_name
     5577            );
     5578        }
     5579
     5580        if ( 'math' === $current_token->namespace ) {
     5581            if ( 'ANNOTATION-XML' !== $tag_name ) {
     5582                return false;
     5583            }
     5584
     5585            $encoding = $this->get_attribute( 'encoding' );
     5586
     5587            return (
     5588                is_string( $encoding ) &&
     5589                (
     5590                    0 === strcasecmp( $encoding, 'application/xhtml+xml' ) ||
     5591                    0 === strcasecmp( $encoding, 'text/html' )
     5592                )
     5593            );
     5594        }
     5595    }
     5596
     5597    /**
    51405598     * Returns whether an element of a given name is in the HTML special category.
    51415599     *
     
    51445602     * @see https://html.spec.whatwg.org/#special
    51455603     *
    5146      * @param string $tag_name Name of element to check.
     5604     * @param WP_HTML_Token|string $tag_name Node to check, or only its name if in the HTML namespace.
    51475605     * @return bool Whether the element of the given name is in the special category.
    51485606     */
    51495607    public static function is_special( $tag_name ): bool {
    5150         $tag_name = strtoupper( $tag_name );
     5608        if ( is_string( $tag_name ) ) {
     5609            $tag_name = strtoupper( $tag_name );
     5610        } else {
     5611            $tag_name = 'html' === $tag_name->namespace
     5612                ? strtoupper( $tag_name->node_name )
     5613                : "{$tag_name->namespace} {$tag_name->node_name}";
     5614        }
    51515615
    51525616        return (
     
    52365700
    52375701            // MathML.
    5238             'MI' === $tag_name ||
    5239             'MO' === $tag_name ||
    5240             'MN' === $tag_name ||
    5241             'MS' === $tag_name ||
    5242             'MTEXT' === $tag_name ||
    5243             'ANNOTATION-XML' === $tag_name ||
     5702            'math MI' === $tag_name ||
     5703            'math MO' === $tag_name ||
     5704            'math MN' === $tag_name ||
     5705            'math MS' === $tag_name ||
     5706            'math MTEXT' === $tag_name ||
     5707            'math ANNOTATION-XML' === $tag_name ||
    52445708
    52455709            // SVG.
    5246             'FOREIGNOBJECT' === $tag_name ||
    5247             'DESC' === $tag_name ||
    5248             'TITLE' === $tag_name
     5710            'svg DESC' === $tag_name ||
     5711            'svg FOREIGNOBJECT' === $tag_name ||
     5712            'svg TITLE' === $tag_name
    52495713        );
    52505714    }
Note: See TracChangeset for help on using the changeset viewer.