Make WordPress Core


Ignore:
Timestamp:
09/13/2023 12:53:32 PM (20 months ago)
Author:
Bernhard Reiter
Message:

HTML API: Skip over contents of RAWTEXT elements such as STYLE.

When encountering elements that imply switching into the RAWTEXT parsing state,
the Tag Processor should skip processing until exiting the RAWTEXT state.

In this patch the Tag Processor does just that, except for the case of the
deprecated XMP element which implies further and more complicated rules.

There's an implicit assumption that the SCRIPT ENABLED flag in HTML parsing
is enabled so that the contents of NOSCRIPT can be skipped. Otherwise, it would
be required to parse the contents of that tag.

Props dmsnell.
Merges [56563] to the 6.3 branch.
Fixes #59292.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/6.3/src/wp-includes/html-api/class-wp-html-tag-processor.php

    r56133 r56564  
    243243 *
    244244 * @since 6.2.0
     245 * @since 6.2.1 Fix: Support for various invalid comments; attribute updates are case-insensitive.
     246 * @since 6.3.2 Fix: Skip HTML-like content inside rawtext elements such as STYLE.
    245247 */
    246248class WP_HTML_Tag_Processor {
     
    569571             */
    570572            $t = $this->html[ $this->tag_name_starts_at ];
    571             if ( ! $this->is_closing_tag && ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) ) {
     573            if (
     574                ! $this->is_closing_tag &&
     575                (
     576                    'i' === $t || 'I' === $t ||
     577                    'n' === $t || 'N' === $t ||
     578                    's' === $t || 'S' === $t ||
     579                    't' === $t || 'T' === $t
     580                ) ) {
    572581                $tag_name = $this->get_tag();
    573582
     
    579588                    ! $this->skip_rcdata( $tag_name )
    580589                ) {
     590                    $this->bytes_already_parsed = strlen( $this->html );
     591                    return false;
     592                } elseif (
     593                    (
     594                        'IFRAME' === $tag_name ||
     595                        'NOEMBED' === $tag_name ||
     596                        'NOFRAMES' === $tag_name ||
     597                        'NOSCRIPT' === $tag_name ||
     598                        'STYLE' === $tag_name
     599                    ) &&
     600                    ! $this->skip_rawtext( $tag_name )
     601                ) {
     602                    /*
     603                     * "XMP" should be here too but its rules are more complicated and require the
     604                     * complexity of the HTML Processor (it needs to close out any open P element,
     605                     * meaning it can't be skipped here or else the HTML Processor will lose its
     606                     * place). For now, it can be ignored as it's a rare HTML tag in practice and
     607                     * any normative HTML should be using PRE instead.
     608                     */
    581609                    $this->bytes_already_parsed = strlen( $this->html );
    582610                    return false;
     
    711739    }
    712740
    713 
    714     /**
    715      * Skips contents of title and textarea tags.
     741    /**
     742     * Skips contents of generic rawtext elements.
     743     *
     744     * @since 6.3.2
     745     *
     746     * @see https://html.spec.whatwg.org/#generic-raw-text-element-parsing-algorithm
     747     *
     748     * @param string $tag_name The uppercase tag name which will close the RAWTEXT region.
     749     * @return bool Whether an end to the RAWTEXT region was found before the end of the document.
     750     */
     751    private function skip_rawtext( $tag_name ) {
     752        /*
     753         * These two functions distinguish themselves on whether character references are
     754         * decoded, and since functionality to read the inner markup isn't supported, it's
     755         * not necessary to implement these two functions separately.
     756         */
     757        return $this->skip_rcdata( $tag_name );
     758    }
     759
     760    /**
     761     * Skips contents of RCDATA elements, namely title and textarea tags.
    716762     *
    717763     * @since 6.2.0
     
    719765     * @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
    720766     *
    721      * @param string $tag_name The lowercase tag name which will close the RCDATA region.
     767     * @param string $tag_name The uppercase tag name which will close the RCDATA region.
    722768     * @return bool Whether an end to the RCDATA region was found before the end of the document.
    723769     */
Note: See TracChangeset for help on using the changeset viewer.