Make WordPress Core

Changeset 58656


Ignore:
Timestamp:
07/03/2024 05:05:46 PM (11 months ago)
Author:
dmsnell
Message:

HTML API: Implement the _reset insertion mode appropriately_ algorithm.

In order to add support for the SELECT and TABLE tags in the HTML Processor, it
needs to implement the HTML algorithm named "reset the insertion mode
appropriately".

This patch implements that algorithm to unblock the additional tag support. The
algorithm resets the parsing mode after specific state changes in complicated
situations where alternative rules are in effect (such as rules governing how
the parser handles tags found within a TABLE element).

Developed in https://github.com/WordPress/wordpress-develop/pull/6020
Discussed in https://core.trac.wordpress.org/ticket/61549

Props dmsnell, jonsurrell.
Fixes #61549.

Location:
trunk/src/wp-includes/html-api
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-processor-state.php

    r58631 r58656  
    217217
    218218    /**
     219     * The stack of template insertion modes.
     220     *
     221     * @since 6.7.0
     222     *
     223     * @see https://html.spec.whatwg.org/#the-insertion-mode:stack-of-template-insertion-modes
     224     *
     225     * @var array<string>
     226     */
     227    public $stack_of_template_insertion_modes = array();
     228
     229    /**
    219230     * Tracks open elements while scanning HTML.
    220231     *
     
    272283     */
    273284    public $context_node = null;
     285
     286    /**
     287     * HEAD element pointer.
     288     *
     289     * @since 6.7.0
     290     *
     291     * @see https://html.spec.whatwg.org/multipage/parsing.html#head-element-pointer
     292     *
     293     * @var WP_HTML_Token|null
     294     */
     295    public $head_element = null;
    274296
    275297    /**
  • trunk/src/wp-includes/html-api/class-wp-html-processor.php

    r58588 r58656  
    21282128        $this->last_error = self::ERROR_UNSUPPORTED;
    21292129        throw new WP_HTML_Unsupported_Exception( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' );
     2130    }
     2131
     2132    /**
     2133     * Runs the reset the insertion mode appropriately algorithm.
     2134     *
     2135     * @since 6.7.0
     2136     *
     2137     * @see https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately
     2138     */
     2139    public function reset_insertion_mode(): void {
     2140        // Set the first node.
     2141        $first_node = null;
     2142        foreach ( $this->state->stack_of_open_elements->walk_down() as $first_node ) {
     2143            break;
     2144        }
     2145
     2146        /*
     2147         * > 1. Let _last_ be false.
     2148         */
     2149        $last = false;
     2150        foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
     2151            /*
     2152             * > 2. Let _node_ be the last node in the stack of open elements.
     2153             * > 3. _Loop_: If _node_ is the first node in the stack of open elements, then set _last_
     2154             * >            to true, and, if the parser was created as part of the HTML fragment parsing
     2155             * >            algorithm (fragment case), set node to the context element passed to
     2156             * >            that algorithm.
     2157             * > …
     2158             */
     2159            if ( $node === $first_node ) {
     2160                $last = true;
     2161                if ( isset( $this->context_node ) ) {
     2162                    $node = $this->context_node;
     2163                }
     2164            }
     2165
     2166            switch ( $node->node_name ) {
     2167                /*
     2168                 * > 4. If node is a `select` element, run these substeps:
     2169                 * >   1. If _last_ is true, jump to the step below labeled done.
     2170                 * >   2. Let _ancestor_ be _node_.
     2171                 * >   3. _Loop_: If _ancestor_ is the first node in the stack of open elements,
     2172                 * >      jump to the step below labeled done.
     2173                 * >   4. Let ancestor be the node before ancestor in the stack of open elements.
     2174                 * >   …
     2175                 * >   7. Jump back to the step labeled _loop_.
     2176                 * >   8. _Done_: Switch the insertion mode to "in select" and return.
     2177                 */
     2178                case 'SELECT':
     2179                    if ( ! $last ) {
     2180                        foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $ancestor ) {
     2181                            switch ( $ancestor->node_name ) {
     2182                                /*
     2183                                 * > 5. If _ancestor_ is a `template` node, jump to the step below
     2184                                 * >    labeled _done_.
     2185                                 */
     2186                                case 'TEMPLATE':
     2187                                    break 2;
     2188
     2189                                /*
     2190                                 * > 6. If _ancestor_ is a `table` node, switch the insertion mode to
     2191                                 * >    "in select in table" and return.
     2192                                 */
     2193                                case 'TABLE':
     2194                                    $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE;
     2195                                    return;
     2196                            }
     2197                        }
     2198                    }
     2199                    $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT;
     2200                    return;
     2201
     2202                /*
     2203                 * > 5. If _node_ is a `td` or `th` element and _last_ is false, then switch the
     2204                 * >    insertion mode to "in cell" and return.
     2205                 */
     2206                case 'TD':
     2207                case 'TH':
     2208                    if ( ! $last ) {
     2209                        $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL;
     2210                        return;
     2211                    }
     2212                    break;
     2213
     2214                    /*
     2215                    * > 6. If _node_ is a `tr` element, then switch the insertion mode to "in row"
     2216                    * >    and return.
     2217                    */
     2218                case 'TR':
     2219                    $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW;
     2220                    return;
     2221
     2222                /*
     2223                 * > 7. If _node_ is a `tbody`, `thead`, or `tfoot` element, then switch the
     2224                 * >    insertion mode to "in table body" and return.
     2225                 */
     2226                case 'TBODY':
     2227                case 'THEAD':
     2228                case 'TFOOT':
     2229                    $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY;
     2230                    return;
     2231
     2232                /*
     2233                 * > 8. If _node_ is a `caption` element, then switch the insertion mode to
     2234                 * >    "in caption" and return.
     2235                 */
     2236                case 'CAPTION':
     2237                    $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION;
     2238                    return;
     2239
     2240                /*
     2241                 * > 9. If _node_ is a `colgroup` element, then switch the insertion mode to
     2242                 * >    "in column group" and return.
     2243                 */
     2244                case 'COLGROUP':
     2245                    $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP;
     2246                    return;
     2247
     2248                /*
     2249                 * > 10. If _node_ is a `table` element, then switch the insertion mode to
     2250                 * >     "in table" and return.
     2251                 */
     2252                case 'TABLE':
     2253                    $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE;
     2254                    return;
     2255
     2256                /*
     2257                 * > 11. If _node_ is a `template` element, then switch the insertion mode to the
     2258                 * >     current template insertion mode and return.
     2259                 */
     2260                case 'TEMPLATE':
     2261                    $this->state->insertion_mode = end( $this->state->stack_of_template_insertion_modes );
     2262                    return;
     2263
     2264                /*
     2265                 * > 12. If _node_ is a `head` element and _last_ is false, then switch the
     2266                 * >     insertion mode to "in head" and return.
     2267                 */
     2268                case 'HEAD':
     2269                    if ( ! $last ) {
     2270                        $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD;
     2271                        return;
     2272                    }
     2273                    break;
     2274
     2275                /*
     2276                 * > 13. If _node_ is a `body` element, then switch the insertion mode to "in body"
     2277                 * >     and return.
     2278                 */
     2279                case 'BODY':
     2280                    $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
     2281                    return;
     2282
     2283                /*
     2284                 * > 14. If _node_ is a `frameset` element, then switch the insertion mode to
     2285                 * >     "in frameset" and return. (fragment case)
     2286                 */
     2287                case 'FRAMESET':
     2288                    $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET;
     2289                    return;
     2290
     2291                /*
     2292                 * > 15. If _node_ is an `html` element, run these substeps:
     2293                 * >     1. If the head element pointer is null, switch the insertion mode to
     2294                 * >        "before head" and return. (fragment case)
     2295                 * >     2. Otherwise, the head element pointer is not null, switch the insertion
     2296                 * >        mode to "after head" and return.
     2297                 */
     2298                case 'HTML':
     2299                    $this->state->insertion_mode = isset( $this->state->head_element )
     2300                        ? WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD
     2301                        : WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD;
     2302                    return;
     2303            }
     2304        }
     2305
     2306        /*
     2307         * > 16. If _last_ is true, then switch the insertion mode to "in body"
     2308         * >     and return. (fragment case)
     2309         *
     2310         * This is only reachable if `$last` is true, as per the fragment parsing case.
     2311         */
     2312        $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
    21302313    }
    21312314
Note: See TracChangeset for help on using the changeset viewer.