Make WordPress Core

Changeset 53918


Ignore:
Timestamp:
08/22/2022 03:37:59 PM (22 months ago)
Author:
SergeyBiryukov
Message:

Database: Account for utf8 being renamed to utf8mb3 in newer MariaDB and MySQL versions.

From MariaDB 10.6.1 release notes:

The utf8 character set (and related collations) is now by default an alias for utf8mb3 rather than the other way around. It can be set to imply utf8mb4 by changing the value of the old_mode system variable (MDEV-8334).

From MySQL 8.0.30 release notes:

Important Change: A previous change renamed character sets having deprecated names prefixed with utf8_ to use utf8mb3_ instead. In this release, we rename the utf8_ collations as well, using the utf8mb3_ prefix; this is to make the collation names consistent with those of the character sets, not to rely any longer on the deprecated collation names, and to clarify the distinction between utf8mb3 and utf8mb4. The names using the utf8mb3_ prefix are now used exclusively for these collations in the output of SHOW statements such as SHOW CREATE TABLE, as well as in the values displayed in the columns of Information Schema tables including the COLLATIONS and COLUMNS tables.

This commit adds utf8mb3_bin and utf8mb3_general_ci to the list of safe collations recognized by wpdb::check_safe_collation(). The full list is now as follows:

  • utf8_bin
  • utf8_general_ci
  • utf8mb3_bin
  • utf8mb3_general_ci
  • utf8mb4_bin
  • utf8mb4_general_ci

The change is covered by existing database charset unit tests: six tests which previously failed on MariaDB 10.6.1+ or MySQL 8.0.30+ now pass.

Includes:

  • Adjusting the expected test results based on MariaDB and MySQL version.
  • Using named data providers for the affected tests to make test output more descriptive.
  • Adding a failure message to each assertion when multiple assertions are used in the test.

References:

Follow-up to [30345], [32162], [37320].

Props skithund, ayeshrajans, JavierCasares, SergeyBiryukov.
Fixes #53623.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/class-wpdb.php

    r53749 r53918  
    33773377
    33783378        // If any of the columns don't have one of these collations, it needs more sanity checking.
     3379        $safe_collations = array(
     3380            'utf8_bin',
     3381            'utf8_general_ci',
     3382            'utf8mb3_bin',
     3383            'utf8mb3_general_ci',
     3384            'utf8mb4_bin',
     3385            'utf8mb4_general_ci',
     3386        );
     3387
    33793388        foreach ( $this->col_meta[ $table ] as $col ) {
    33803389            if ( empty( $col->Collation ) ) {
     
    33823391            }
    33833392
    3384             if ( ! in_array( $col->Collation, array( 'utf8_general_ci', 'utf8_bin', 'utf8mb4_general_ci', 'utf8mb4_bin' ), true ) ) {
     3393            if ( ! in_array( $col->Collation, $safe_collations, true ) ) {
    33853394                return false;
    33863395            }
  • trunk/tests/phpunit/tests/db/charset.php

    r52248 r53918  
    1010
    1111    /**
    12      * Our special WPDB
     12     * Our special WPDB.
    1313     *
    1414     * @var resource
     
    1717
    1818    /**
    19      * The version of the MySQL server.
     19     * Whether to expect utf8mb3 instead of utf8 in various commands output.
     20     *
     21     * @var bool
     22     */
     23    private static $utf8_is_utf8mb3 = false;
     24
     25    /**
     26     * The database server version.
    2027     *
    2128     * @var string
    2229     */
    23     private static $server_info;
     30    private static $db_version;
     31
     32    /**
     33     * Full database server information.
     34     *
     35     * @var string
     36     */
     37    private static $db_server_info;
    2438
    2539    public static function set_up_before_class() {
     
    3044        self::$_wpdb = new WpdbExposedMethodsForTesting();
    3145
    32         self::$server_info = self::$_wpdb->db_server_info();
     46        self::$db_version     = self::$_wpdb->db_version();
     47        self::$db_server_info = self::$_wpdb->db_server_info();
     48
     49        /*
     50         * MariaDB 10.6.1 or later and MySQL 8.0.30 or later
     51         * use utf8mb3 instead of utf8 in various commands output.
     52         */
     53        if ( str_contains( self::$db_server_info, 'MariaDB' ) && version_compare( self::$db_version, '10.6.1', '>=' )
     54            || ! str_contains( self::$db_server_info, 'MariaDB' ) && version_compare( self::$db_version, '8.0.30', '>=' )
     55        ) {
     56            self::$utf8_is_utf8mb3 = true;
     57        }
    3358    }
    3459
     
    493518        }
    494519
    495         if ( 'big5' === $new_charset && 'byte' === $data[0]['length']['type'] && false !== strpos( self::$server_info, 'MariaDB' ) ) {
     520        if ( 'big5' === $new_charset && 'byte' === $data[0]['length']['type']
     521            && str_contains( self::$db_server_info, 'MariaDB' )
     522        ) {
    496523            $this->markTestSkipped( "MariaDB doesn't support this data set. See https://core.trac.wordpress.org/ticket/33171." );
    497524        }
     
    809836
    810837        foreach ( $expected_charset as $column => $charset ) {
     838            if ( self::$utf8_is_utf8mb3 && 'utf8' === $charset ) {
     839                $charset = 'utf8mb3';
     840            }
     841
    811842            $this->assertSame( $charset, self::$_wpdb->get_col_charset( $table, $column ) );
    812843            $this->assertSame( $charset, self::$_wpdb->get_col_charset( strtoupper( $table ), strtoupper( $column ) ) );
     
    876907        $table_name = 'strip_invalid_text_from_query_table';
    877908        $data       = array(
    878             array(
     909            'utf8 + binary'  => array(
    879910                // Binary tables don't get stripped.
    880                 '( a VARCHAR(50) CHARACTER SET utf8, b BINARY )', // Create.
    881                 "('foo\xf0\x9f\x98\x88bar', 'foo')",              // Query.
    882                 "('foo\xf0\x9f\x98\x88bar', 'foo')",              // Expected result.
    883             ),
    884             array(
     911                'create'   => '( a VARCHAR(50) CHARACTER SET utf8, b BINARY )',
     912                'query'    => "('foo\xf0\x9f\x98\x88bar', 'foo')",
     913                'expected' => "('foo\xf0\x9f\x98\x88bar', 'foo')",
     914            ),
     915            'utf8 + utf8mb4' => array(
    885916                // utf8/utf8mb4 tables default to utf8.
    886                 '( a VARCHAR(50) CHARACTER SET utf8, b VARCHAR(50) CHARACTER SET utf8mb4 )',
    887                 "('foo\xf0\x9f\x98\x88bar', 'foo')",
    888                 "('foobar', 'foo')",
     917                'create'   => '( a VARCHAR(50) CHARACTER SET utf8, b VARCHAR(50) CHARACTER SET utf8mb4 )',
     918                'query'    => "('foo\xf0\x9f\x98\x88bar', 'foo')",
     919                'expected' => "('foobar', 'foo')",
    889920            ),
    890921        );
    891922
    892         foreach ( $data as $i => &$value ) {
    893             $this_table_name = $table_name . '_' . $i;
    894 
    895             $value[0] = "CREATE TABLE $this_table_name {$value[0]}";
    896             $value[1] = "INSERT INTO $this_table_name VALUES {$value[1]}";
    897             $value[2] = "INSERT INTO $this_table_name VALUES {$value[2]}";
    898             $value[3] = "DROP TABLE IF EXISTS $this_table_name";
     923        $i = 0;
     924
     925        foreach ( $data as &$value ) {
     926            $this_table_name = $table_name . '_' . $i++;
     927
     928            $value['create']   = "CREATE TABLE $this_table_name {$value['create']}";
     929            $value['query']    = "INSERT INTO $this_table_name VALUES {$value['query']}";
     930            $value['expected'] = "INSERT INTO $this_table_name VALUES {$value['expected']}";
     931            $value['drop']     = "DROP TABLE IF EXISTS $this_table_name";
    899932        }
    900933        unset( $value );
     
    9801013        $table_name = 'table_collation_check';
    9811014        $data       = array(
    982             array(
     1015            'utf8_bin'                   => array(
    9831016                // utf8_bin tables don't need extra sanity checking.
    984                 '( a VARCHAR(50) COLLATE utf8_bin )', // Create.
    985                 true,                                 // Expected result.
    986             ),
    987             array(
     1017                'create'   => '( a VARCHAR(50) COLLATE utf8_bin )',
     1018                'expected' => true,
     1019            ),
     1020            'utf8_general_ci'            => array(
    9881021                // Neither do utf8_general_ci tables.
    989                 '( a VARCHAR(50) COLLATE utf8_general_ci )',
    990                 true,
    991             ),
    992             array(
     1022                'create'   => '( a VARCHAR(50) COLLATE utf8_general_ci )',
     1023                'expected' => true,
     1024            ),
     1025            'utf8_unicode_ci'            => array(
    9931026                // utf8_unicode_ci tables do.
    994                 '( a VARCHAR(50) COLLATE utf8_unicode_ci )',
    995                 false,
    996             ),
    997             array(
     1027                'create'   => '( a VARCHAR(50) COLLATE utf8_unicode_ci )',
     1028                'expected' => false,
     1029            ),
     1030            'utf8_bin + big5_chinese_ci' => array(
    9981031                // utf8_bin tables don't need extra sanity checking,
    9991032                // except for when they're not just utf8_bin.
    1000                 '( a VARCHAR(50) COLLATE utf8_bin, b VARCHAR(50) COLLATE big5_chinese_ci )',
    1001                 false,
    1002             ),
    1003             array(
     1033                'create'   => '( a VARCHAR(50) COLLATE utf8_bin, b VARCHAR(50) COLLATE big5_chinese_ci )',
     1034                'expected' => false,
     1035            ),
     1036            'utf8_bin + int'             => array(
    10041037                // utf8_bin tables don't need extra sanity checking
    10051038                // when the other columns aren't strings.
    1006                 '( a VARCHAR(50) COLLATE utf8_bin, b INT )',
    1007                 true,
     1039                'create'   => '( a VARCHAR(50) COLLATE utf8_bin, b INT )',
     1040                'expected' => true,
    10081041            ),
    10091042        );
    10101043
    1011         foreach ( $data as $i => &$value ) {
    1012             $this_table_name = $table_name . '_' . $i;
    1013 
    1014             $value[0] = "CREATE TABLE $this_table_name {$value[0]}";
    1015             $value[2] = "SELECT * FROM $this_table_name WHERE a='\xf0\x9f\x98\x88'";
    1016             $value[3] = "DROP TABLE IF EXISTS $this_table_name";
    1017             $value[4] = array(
     1044        $i = 0;
     1045
     1046        foreach ( $data as &$value ) {
     1047            $this_table_name = $table_name . '_' . $i++;
     1048
     1049            $value['create']      = "CREATE TABLE $this_table_name {$value['create']}";
     1050            $value['query']       = "SELECT * FROM $this_table_name WHERE a='\xf0\x9f\x98\x88'";
     1051            $value['drop']        = "DROP TABLE IF EXISTS $this_table_name";
     1052            $value['always_true'] = array(
    10181053                "SELECT * FROM $this_table_name WHERE a='foo'",
    10191054                "SHOW FULL TABLES LIKE $this_table_name",
     
    10411076
    10421077        $return = self::$_wpdb->check_safe_collation( $query );
    1043         $this->assertSame( $expected, $return );
     1078        $this->assertSame(
     1079            $expected,
     1080            $return,
     1081            sprintf(
     1082                "wpdb::check_safe_collation() should return %s for this query.\n" .
     1083                "Table: %s\n" .
     1084                'Query: %s',
     1085                $expected ? 'true' : 'false',
     1086                $create,
     1087                $query
     1088            )
     1089        );
    10441090
    10451091        foreach ( $always_true as $true_query ) {
    10461092            $return = self::$_wpdb->check_safe_collation( $true_query );
    1047             $this->assertTrue( $return );
     1093            $this->assertTrue(
     1094                $return,
     1095                sprintf(
     1096                    "wpdb::check_safe_collation() should return true for this query.\n" .
     1097                    "Table: %s\n" .
     1098                    'Query: %s',
     1099                    $create,
     1100                    $true_query
     1101                )
     1102            );
    10481103        }
    10491104
     
    11161171    public function test_set_charset_changes_the_connection_collation() {
    11171172        self::$_wpdb->set_charset( self::$_wpdb->dbh, 'utf8', 'utf8_general_ci' );
    1118         $results = self::$_wpdb->get_results( "SHOW VARIABLES WHERE Variable_name='collation_connection'" );
    1119         $this->assertSame( 'utf8_general_ci', $results[0]->Value );
     1173        $results  = self::$_wpdb->get_results( "SHOW VARIABLES WHERE Variable_name='collation_connection'" );
     1174        $expected = self::$utf8_is_utf8mb3 ? 'utf8mb3_general_ci' : 'utf8_general_ci';
     1175        $this->assertSame( $expected, $results[0]->Value, "Collation should be set to $expected." );
    11201176
    11211177        self::$_wpdb->set_charset( self::$_wpdb->dbh, 'utf8mb4', 'utf8mb4_unicode_ci' );
    11221178        $results = self::$_wpdb->get_results( "SHOW VARIABLES WHERE Variable_name='collation_connection'" );
    1123         $this->assertSame( 'utf8mb4_unicode_ci', $results[0]->Value );
     1179        $this->assertSame( 'utf8mb4_unicode_ci', $results[0]->Value, 'Collation should be set to utf8mb4_unicode_ci.' );
    11241180
    11251181        self::$_wpdb->set_charset( self::$_wpdb->dbh );
Note: See TracChangeset for help on using the changeset viewer.