Make WordPress Core

Changeset 58159


Ignore:
Timestamp:
05/15/2024 05:40:44 PM (5 weeks ago)
Author:
dmsnell
Message:

Improve legibility of JSON-encoded Interactivity API store data.

The Interactivity API has been rendering client data in a SCRIPT element with the
type application/json so that it's not executed as a script, but is available
to one. The data runs through wp_json_encode() and is encoded with some flags
to ensure that potentially-dangerous characters are escaped.

However, this can lead to some challenges. Eagerly escaping when not necessary
can make the data difficult to comprehend when reading the output HTML. For example,
all non-ASCII Unicode characters are escaped with their code point equivalent.
This results in \ud83c\udd70 instead of 🅰.

In this patch, the flags for JSON encoding are refined to ensure what's necessary
while relaxing other rules (leaving in those Unicode characters if the blog charset
is UTF-8). This makes for Interactivity API data that's quicker as a human reader
to decipher and diagnose.

In summary:

  • This data is JSON encoded and printed in a <script type="application/json"> tag.
  • If we ensure that < is never printed inside the data, it should be impossible to break out of the script tag and the browser treats everything as the element's textContent.
  • All other escaping becomes unnecessary at that point, including unicode escaping if the page uses the UTF-8 charset (the same encoding as JSON).

See https://github.com/WordPress/wordpress-develop/pull/6433#pullrequestreview-2043218338

Developed in https://github.com/WordPress/wordpress-develop/pull/6520
Discussed in https://core.trac.wordpress.org/ticket/61170

Fixes: #61170
Follow-up to: [57563].
Props: bjorsch, dmsnell, jonsurrell, sabernhardt, westonruter.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/interactivity-api/class-wp-interactivity-api.php

    r58127 r58159  
    168168
    169169        if ( ! empty( $interactivity_data ) ) {
     170            /*
     171             * This data will be printed as JSON inside a script tag like this:
     172             *   <script type="application/json"></script>
     173             *
     174             * A script tag must be closed by a sequence beginning with `</`. It's impossible to
     175             * close a script tag without using `<`. We ensure that `<` is escaped and `/` can
     176             * remain unescaped, so `</script>` will be printed as `\u003C/script\u00E3`.
     177             *
     178             *   - JSON_HEX_TAG: All < and > are converted to \u003C and \u003E.
     179             *   - JSON_UNESCAPED_SLASHES: Don't escape /.
     180             *
     181             * If the page will use UTF-8 encoding, it's safe to print unescaped unicode:
     182             *
     183             *   - JSON_UNESCAPED_UNICODE: Encode multibyte Unicode characters literally (instead of as `\uXXXX`).
     184             *   - JSON_UNESCAPED_LINE_TERMINATORS: The line terminators are kept unescaped when
     185             *     JSON_UNESCAPED_UNICODE is supplied. It uses the same behaviour as it was
     186             *     before PHP 7.1 without this constant. Available as of PHP 7.1.0.
     187             *
     188             * The JSON specification requires encoding in UTF-8, so if the generated HTML page
     189             * is not encoded in UTF-8 then it's not safe to include those literals. They must
     190             * be escaped to avoid encoding issues.
     191             *
     192             * @see https://www.rfc-editor.org/rfc/rfc8259.html for details on encoding requirements.
     193             * @see https://www.php.net/manual/en/json.constants.php for details on these constants.
     194             * @see https://html.spec.whatwg.org/#script-data-state for details on script tag parsing.
     195             */
     196            $json_encode_flags = JSON_HEX_TAG | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_LINE_TERMINATORS;
     197            if ( ! is_utf8_charset() ) {
     198                $json_encode_flags = JSON_HEX_TAG | JSON_UNESCAPED_SLASHES;
     199            }
     200
    170201            wp_print_inline_script_tag(
    171202                wp_json_encode(
    172203                    $interactivity_data,
    173                     JSON_HEX_TAG | JSON_HEX_AMP
     204                    $json_encode_flags
    174205                ),
    175206                array(
  • trunk/tests/phpunit/tests/interactivity-api/wpInteractivityAPI.php

    r58040 r58159  
    2828    }
    2929
     30    public function charset_iso_8859_1() {
     31        return 'iso-8859-1';
     32    }
     33
    3034    /**
    3135     * Tests that the state and config methods return an empty array at the
     
    350354     *
    351355     * @ticket 60356
     356     * @ticket 61170
    352357     *
    353358     * @covers ::state
     
    356361     */
    357362    public function test_state_and_config_escape_special_characters() {
    358         $this->interactivity->state( 'myPlugin', array( 'amps' => 'http://site.test/?foo=1&baz=2' ) );
    359         $this->interactivity->config( 'myPlugin', array( 'tags' => 'Tags: <!-- <script>' ) );
     363        $this->interactivity->state(
     364            'myPlugin',
     365            array(
     366                'ampersand'                              => '&',
     367                'less-than sign'                         => '<',
     368                'greater-than sign'                      => '>',
     369                'solidus'                                => '/',
     370                'line separator'                         => "\u{2028}",
     371                'paragraph separator'                    => "\u{2029}",
     372                'flag of england'                        => "\u{1F3F4}\u{E0067}\u{E0062}\u{E0065}\u{E006E}\u{E0067}\u{E007F}",
     373                'malicious script closer'                => '</script>',
     374                'entity-encoded malicious script closer' => '&lt;/script&gt;',
     375            )
     376        );
     377        $this->interactivity->config( 'myPlugin', array( 'chars' => '&<>/' ) );
    360378
    361379        $interactivity_data_markup = get_echo( array( $this->interactivity, 'print_client_interactivity_data' ) );
    362         preg_match( '/<script type="application\/json" id="wp-interactivity-data">.*?(\{.*\}).*?<\/script>/s', $interactivity_data_markup, $interactivity_data_string );
    363 
    364         $this->assertEquals(
    365             '{"config":{"myPlugin":{"tags":"Tags: \u003C!-- \u003Cscript\u003E"}},"state":{"myPlugin":{"amps":"http:\/\/site.test\/?foo=1\u0026baz=2"}}}',
    366             $interactivity_data_string[1]
    367         );
     380        preg_match( '~<script type="application/json" id="wp-interactivity-data">\s*(\{.*\})\s*</script>~s', $interactivity_data_markup, $interactivity_data_string );
     381
     382        $expected = <<<"JSON"
     383{"config":{"myPlugin":{"chars":"&\\u003C\\u003E/"}},"state":{"myPlugin":{"ampersand":"&","less-than sign":"\\u003C","greater-than sign":"\\u003E","solidus":"/","line separator":"\u{2028}","paragraph separator":"\u{2029}","flag of england":"\u{1F3F4}\u{E0067}\u{E0062}\u{E0065}\u{E006E}\u{E0067}\u{E007F}","malicious script closer":"\\u003C/script\\u003E","entity-encoded malicious script closer":"&lt;/script&gt;"}}}
     384JSON;
     385        $this->assertEquals( $expected, $interactivity_data_string[1] );
     386    }
     387
     388    /**
     389     * Tests that special characters in the initial state and configuration are
     390     * properly escaped when the blog_charset is not UTF-8 (unicode compatible).
     391     *
     392     * This this test, unicode and line terminators should be escaped to their
     393     * JSON unicode sequences.
     394     *
     395     * @ticket 61170
     396     *
     397     * @covers ::state
     398     * @covers ::config
     399     * @covers ::print_client_interactivity_data
     400     */
     401    public function test_state_and_config_escape_special_characters_non_utf8() {
     402        add_filter( 'pre_option_blog_charset', array( $this, 'charset_iso_8859_1' ) );
     403        $this->interactivity->state(
     404            'myPlugin',
     405            array(
     406                'ampersand'                              => '&',
     407                'less-than sign'                         => '<',
     408                'greater-than sign'                      => '>',
     409                'solidus'                                => '/',
     410                'line separator'                         => "\u{2028}",
     411                'paragraph separator'                    => "\u{2029}",
     412                'flag of england'                        => "\u{1F3F4}\u{E0067}\u{E0062}\u{E0065}\u{E006E}\u{E0067}\u{E007F}",
     413                'malicious script closer'                => '</script>',
     414                'entity-encoded malicious script closer' => '&lt;/script&gt;',
     415            )
     416        );
     417        $this->interactivity->config( 'myPlugin', array( 'chars' => '&<>/' ) );
     418
     419        $interactivity_data_markup = get_echo( array( $this->interactivity, 'print_client_interactivity_data' ) );
     420        preg_match( '~<script type="application/json" id="wp-interactivity-data">\s*(\{.*\})\s*</script>~s', $interactivity_data_markup, $interactivity_data_string );
     421
     422        $expected = <<<"JSON"
     423{"config":{"myPlugin":{"chars":"&\\u003C\\u003E/"}},"state":{"myPlugin":{"ampersand":"&","less-than sign":"\\u003C","greater-than sign":"\\u003E","solidus":"/","line separator":"\\u2028","paragraph separator":"\\u2029","flag of england":"\\ud83c\\udff4\\udb40\\udc67\\udb40\\udc62\\udb40\\udc65\\udb40\\udc6e\\udb40\\udc67\\udb40\\udc7f","malicious script closer":"\\u003C/script\\u003E","entity-encoded malicious script closer":"&lt;/script&gt;"}}}
     424JSON;
     425        $this->assertEquals( $expected, $interactivity_data_string[1] );
    368426    }
    369427
Note: See TracChangeset for help on using the changeset viewer.