Make WordPress Core

Changeset 60617


Ignore:
Timestamp:
08/07/2025 08:31:07 AM (6 months ago)
Author:
jonsurrell
Message:

HTML API: Reduce length checks in skip_script_data.

Apply an optimization to remove several repeated string length checks in WP_HTML_Tag_Processor::skip_script_data().

Developed in https://github.com/WordPress/wordpress-develop/pull/9230.

Props jonsurrell, dmsnell.
See #63738.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-tag-processor.php

    r60540 r60617  
    14981498
    14991499            /*
     1500             * Optimization: Terminating a complete script element requires at least eight
     1501             * additional bytes in the document. Some checks below may cause local escaped
     1502             * state transitions when processing shorter strings, but those transitions are
     1503             * irrelevant if the script tag is incomplete and the function must return false.
     1504             *
     1505             * This may need updating if those transitions become significant or exported from
     1506             * this function in some way, such as when building safe methods to embed JavaScript
     1507             * or data inside a SCRIPT element.
     1508             *
     1509             *     $at may be here.
     1510             *        ↓
     1511             *     ...</script>
     1512             *         ╰──┬───╯
     1513             *     $at + 8 additional bytes are required for a non-false return value.
     1514             *
     1515             * This single check eliminates the need to check lengths for the shorter spans:
     1516             *
     1517             *           $at may be here.
     1518             *                  ↓
     1519             *     <script><!-- --></script>
     1520             *                   ├╯
     1521             *             $at + 2 additional characters does not require a length check.
     1522             *
     1523             * The transition from "escaped" to "unescaped" is not relevant if the document ends:
     1524             *
     1525             *           $at may be here.
     1526             *                  ↓
     1527             *     <script><!-- -->[[END-OF-DOCUMENT]]
     1528             *                   ╰──┬───╯
     1529             *             $at + 8 additional bytes is not satisfied, return false.
     1530             */
     1531            if ( $at + 8 >= $doc_length ) {
     1532                return false;
     1533            }
     1534
     1535            /*
    15001536             * For all script states a "-->"  transitions
    15011537             * back into the normal unescaped script mode,
     
    15031539             */
    15041540            if (
    1505                 $at + 2 < $doc_length &&
    15061541                '-' === $html[ $at ] &&
    15071542                '-' === $html[ $at + 1 ] &&
     
    15111546                $state = 'unescaped';
    15121547                continue;
    1513             }
    1514 
    1515             if ( $at + 1 >= $doc_length ) {
    1516                 return false;
    15171548            }
    15181549
     
    15381569             */
    15391570            if (
    1540                 $at + 2 < $doc_length &&
    15411571                '!' === $html[ $at ] &&
    15421572                '-' === $html[ $at + 1 ] &&
     
    15621592             */
    15631593            if ( ! (
    1564                 $at + 6 < $doc_length &&
    15651594                ( 's' === $html[ $at ] || 'S' === $html[ $at ] ) &&
    15661595                ( 'c' === $html[ $at + 1 ] || 'C' === $html[ $at + 1 ] ) &&
     
    15801609             * "<script" is found within the text.
    15811610             */
    1582             if ( $at + 6 >= $doc_length ) {
    1583                 continue;
    1584             }
    15851611            $at += 6;
    15861612            $c   = $html[ $at ];
     
    16121638
    16131639                if ( $this->bytes_already_parsed >= $doc_length ) {
    1614                     $this->parser_state = self::STATE_INCOMPLETE_INPUT;
    1615 
    16161640                    return false;
    16171641                }
  • trunk/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php

    r59464 r60617  
    19791979                '<script class="d-md-none"><!--<script>--><scRipt><span><!--<span><Script</script>--></scripT><div></div>-->',
    19801980            ),
     1981        );
     1982    }
     1983
     1984    /**
     1985     * Test that script tags are parsed correctly.
     1986     *
     1987     * Script tag parsing is very complicated, see the following resources for more details:
     1988     *
     1989     * - https://html.spec.whatwg.org/multipage/parsing.html#script-data-state
     1990     * - https://html.spec.whatwg.org/multipage/scripting.html#restrictions-for-contents-of-script-elements
     1991     *
     1992     * @ticket 63738
     1993     *
     1994     * @dataProvider data_script_tag
     1995     */
     1996    public function test_script_tag_parsing( string $input, bool $closes ) {
     1997        $processor = new WP_HTML_Tag_Processor( $input );
     1998
     1999        if ( $closes ) {
     2000            $this->assertTrue( $processor->next_token(), 'Expected to find complete script tag.' );
     2001            $this->assertSame( 'SCRIPT', $processor->get_tag() );
     2002            return;
     2003        }
     2004
     2005        $this->assertFalse( $processor->next_token(), 'Expected to fail next_token().' );
     2006        $this->assertTrue( $processor->paused_at_incomplete_token(), 'Expected an incomplete SCRIPT tag token.' );
     2007    }
     2008
     2009    /**
     2010     * Data provider.
     2011     */
     2012    public static function data_script_tag(): array {
     2013        return array(
     2014            'Basic script tag'                          => array( '<script></script>', true ),
     2015            'Script with type attribute'                => array( '<script type="text/javascript"></script>', true ),
     2016            'Script data escaped'                       => array( '<script><!--</script>', true ),
     2017            'Script data double-escaped exit (comment)' => array( '<script><!--<script>--></script>', true ),
     2018            'Script data double-escaped exit (closed)'  => array( '<script><!--<script></script></script>', true ),
     2019            'Script data double-escaped exit (closed/truncated)' => array( '<script><!--<script></script </script>', true ),
     2020            'Script data no double-escape'              => array( '<script><!-- --><script></script>', true ),
     2021
     2022            'Script tag with self-close flag (ignored)' => array( '<script />', false ),
     2023            'Script data double-escaped'                => array( '<script><!--<script></script>', false ),
    19812024        );
    19822025    }
Note: See TracChangeset for help on using the changeset viewer.