Make WordPress Core

Changeset 58977


Ignore:
Timestamp:
09/03/2024 07:48:57 PM (6 weeks ago)
Author:
dmsnell
Message:

HTML API: Ensure that NULL and whitespace-only CDATA sections don't forbid FRAMESET.

When CDATA sections (which can only occur inside SVG and MathML content) consist only of NULL bytes or whitespace characters they should not clear the "frameset ok" flag. Previously they have always been clearing this flag, but in this patch the logic is updated to detect these sequences properly.

Developed in https://github.com/WordPress/wordpress-develop/pull/7230
Discussed in https://core.trac.wordpress.org/ticket/61576

Follow-up to [58867].

Props dmsnell, jonsurrell.
See #61576.

Location:
trunk/src/wp-includes/html-api
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-processor.php

    r58970 r58977  
    844844        if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
    845845            parent::next_token();
    846             if (
    847                 WP_HTML_Tag_Processor::STATE_TEXT_NODE === $this->parser_state ||
    848                 WP_HTML_Tag_Processor::STATE_CDATA_NODE === $this->parser_state
    849             ) {
     846            if ( WP_HTML_Tag_Processor::STATE_TEXT_NODE === $this->parser_state ) {
    850847                parent::subdivide_text_appropriately();
    851848            }
     
    43764373
    43774374        switch ( $op ) {
    4378             case '#cdata-section':
    43794375            case '#text':
    43804376                /*
     
    43904386                 */
    43914387                if ( parent::TEXT_IS_GENERIC === $this->text_node_classification ) {
     4388                    $this->state->frameset_ok = false;
     4389                }
     4390
     4391                $this->insert_foreign_element( $this->state->current_token, false );
     4392                return true;
     4393
     4394            /*
     4395             * CDATA sections are alternate wrappers for text content and therefore
     4396             * ought to follow the same rules as text nodes.
     4397             */
     4398            case '#cdata-section':
     4399                /*
     4400                 * NULL bytes and whitespace do not change the frameset-ok flag.
     4401                 */
     4402                $current_token        = $this->bookmarks[ $this->state->current_token->bookmark_name ];
     4403                $cdata_content_start  = $current_token->start + 9;
     4404                $cdata_content_length = $current_token->length - 12;
     4405                if ( strspn( $this->html, "\0 \t\n\f\r", $cdata_content_start, $cdata_content_length ) !== $cdata_content_length ) {
    43924406                    $this->state->frameset_ok = false;
    43934407                }
  • trunk/src/wp-includes/html-api/class-wp-html-tag-processor.php

    r58970 r58977  
    33383338
    33393339    /**
    3340      * Subdivides a matched text node or CDATA text node, splitting NULL byte sequences
    3341      * and decoded whitespace as distinct prefixes.
     3340     * Subdivides a matched text node, splitting NULL byte sequences and decoded whitespace as
     3341     * distinct nodes prefixes.
    33423342     *
    33433343     * Note that once anything that's neither a NULL byte nor decoded whitespace is
     
    33693369     */
    33703370    public function subdivide_text_appropriately(): bool {
     3371        if ( self::STATE_TEXT_NODE !== $this->parser_state ) {
     3372            return false;
     3373        }
     3374
    33713375        $this->text_node_classification = self::TEXT_IS_GENERIC;
    33723376
    3373         if ( self::STATE_TEXT_NODE === $this->parser_state ) {
    3374             /*
    3375              * NULL bytes are treated categorically different than numeric character
    3376              * references whose number is zero. `�` is not the same as `"\x00"`.
    3377              */
    3378             $leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length );
    3379             if ( $leading_nulls > 0 ) {
    3380                 $this->token_length             = $leading_nulls;
    3381                 $this->text_length              = $leading_nulls;
    3382                 $this->bytes_already_parsed     = $this->token_starts_at + $leading_nulls;
    3383                 $this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE;
    3384                 return true;
    3385             }
    3386 
    3387             /*
    3388              * Start a decoding loop to determine the point at which the
    3389              * text subdivides. This entails raw whitespace bytes and any
    3390              * character reference that decodes to the same.
    3391              */
    3392             $at  = $this->text_starts_at;
    3393             $end = $this->text_starts_at + $this->text_length;
    3394             while ( $at < $end ) {
    3395                 $skipped = strspn( $this->html, " \t\f\r\n", $at, $end - $at );
    3396                 $at     += $skipped;
    3397 
    3398                 if ( $at < $end && '&' === $this->html[ $at ] ) {
    3399                     $matched_byte_length = null;
    3400                     $replacement         = WP_HTML_Decoder::read_character_reference( 'data', $this->html, $at, $matched_byte_length );
    3401                     if ( isset( $replacement ) && 1 === strspn( $replacement, " \t\f\r\n" ) ) {
    3402                         $at += $matched_byte_length;
    3403                         continue;
    3404                     }
     3377        /*
     3378         * NULL bytes are treated categorically different than numeric character
     3379         * references whose number is zero. `&#x00;` is not the same as `"\x00"`.
     3380         */
     3381        $leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length );
     3382        if ( $leading_nulls > 0 ) {
     3383            $this->token_length             = $leading_nulls;
     3384            $this->text_length              = $leading_nulls;
     3385            $this->bytes_already_parsed     = $this->token_starts_at + $leading_nulls;
     3386            $this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE;
     3387            return true;
     3388        }
     3389
     3390        /*
     3391         * Start a decoding loop to determine the point at which the
     3392         * text subdivides. This entails raw whitespace bytes and any
     3393         * character reference that decodes to the same.
     3394         */
     3395        $at  = $this->text_starts_at;
     3396        $end = $this->text_starts_at + $this->text_length;
     3397        while ( $at < $end ) {
     3398            $skipped = strspn( $this->html, " \t\f\r\n", $at, $end - $at );
     3399            $at     += $skipped;
     3400
     3401            if ( $at < $end && '&' === $this->html[ $at ] ) {
     3402                $matched_byte_length = null;
     3403                $replacement         = WP_HTML_Decoder::read_character_reference( 'data', $this->html, $at, $matched_byte_length );
     3404                if ( isset( $replacement ) && 1 === strspn( $replacement, " \t\f\r\n" ) ) {
     3405                    $at += $matched_byte_length;
     3406                    continue;
    34053407                }
    3406 
    3407                 break;
    3408             }
    3409 
    3410             if ( $at > $this->text_starts_at ) {
    3411                 $new_length                     = $at - $this->text_starts_at;
    3412                 $this->text_length              = $new_length;
    3413                 $this->token_length             = $new_length;
    3414                 $this->bytes_already_parsed     = $at;
    3415                 $this->text_node_classification = self::TEXT_IS_WHITESPACE;
    3416                 return true;
    3417             }
    3418 
    3419             return false;
    3420         }
    3421 
    3422         // Unlike text nodes, there are no character references within CDATA sections.
    3423         if ( self::STATE_CDATA_NODE === $this->parser_state ) {
    3424             $leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length );
    3425             if ( $leading_nulls === $this->text_length ) {
    3426                 $this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE;
    3427                 return true;
    3428             }
    3429 
    3430             $leading_ws = strspn( $this->html, " \t\f\r\n", $this->text_starts_at, $this->text_length );
    3431             if ( $leading_ws === $this->text_length ) {
    3432                 $this->text_node_classification = self::TEXT_IS_WHITESPACE;
    3433                 return true;
    3434             }
     3408            }
     3409
     3410            break;
     3411        }
     3412
     3413        if ( $at > $this->text_starts_at ) {
     3414            $new_length                     = $at - $this->text_starts_at;
     3415            $this->text_length              = $new_length;
     3416            $this->token_length             = $new_length;
     3417            $this->bytes_already_parsed     = $at;
     3418            $this->text_node_classification = self::TEXT_IS_WHITESPACE;
     3419            return true;
    34353420        }
    34363421
Note: See TracChangeset for help on using the changeset viewer.