Ticket #21212: 21212-utf8mb4.9.diff
| File 21212-utf8mb4.9.diff, 12.2 KB (added by , 11 years ago) |
|---|
-
src/wp-admin/includes/schema.php
44 44 // Engage multisite if in the middle of turning it on from network.php. 45 45 $is_multisite = is_multisite() || ( defined( 'WP_INSTALLING_NETWORK' ) && WP_INSTALLING_NETWORK ); 46 46 47 /* 48 * Indexes have a maximum size of 767 bytes. Historically, we haven't need to be concerned about that. 49 * As of 4.2, however, we moved to utf8mb4, which uses 4 bytes per character. This means that an index which 50 * used to have room for floor(767/3) = 255 characters, now only has room for floor(767/4) = 191 characters. 51 */ 52 $max_index_length = 191; 53 47 54 // Blog specific tables. 48 55 $blog_tables = "CREATE TABLE $wpdb->terms ( 49 56 term_id bigint(20) unsigned NOT NULL auto_increment, … … 51 58 slug varchar(200) NOT NULL default '', 52 59 term_group bigint(10) NOT NULL default 0, 53 60 PRIMARY KEY (term_id), 54 KEY slug (slug ),55 KEY name (name )61 KEY slug (slug($max_index_length)), 62 KEY name (name($max_index_length)) 56 63 ) $charset_collate; 57 64 CREATE TABLE $wpdb->term_taxonomy ( 58 65 term_taxonomy_id bigint(20) unsigned NOT NULL auto_increment, … … 79 86 meta_value longtext, 80 87 PRIMARY KEY (meta_id), 81 88 KEY comment_id (comment_id), 82 KEY meta_key (meta_key )89 KEY meta_key (meta_key($max_index_length)) 83 90 ) $charset_collate; 84 91 CREATE TABLE $wpdb->comments ( 85 92 comment_ID bigint(20) unsigned NOT NULL auto_increment, … … 136 143 meta_value longtext, 137 144 PRIMARY KEY (meta_id), 138 145 KEY post_id (post_id), 139 KEY meta_key (meta_key )146 KEY meta_key (meta_key($max_index_length)) 140 147 ) $charset_collate; 141 148 CREATE TABLE $wpdb->posts ( 142 149 ID bigint(20) unsigned NOT NULL auto_increment, … … 163 170 post_mime_type varchar(100) NOT NULL default '', 164 171 comment_count bigint(20) NOT NULL default '0', 165 172 PRIMARY KEY (ID), 166 KEY post_name (post_name ),173 KEY post_name (post_name($max_index_length)), 167 174 KEY type_status_date (post_type,post_status,post_date,ID), 168 175 KEY post_parent (post_parent), 169 176 KEY post_author (post_author) … … 213 220 meta_value longtext, 214 221 PRIMARY KEY (umeta_id), 215 222 KEY user_id (user_id), 216 KEY meta_key (meta_key )223 KEY meta_key (meta_key($max_index_length)) 217 224 ) $charset_collate;\n"; 218 225 219 226 // Global tables … … 261 268 domain varchar(200) NOT NULL default '', 262 269 path varchar(100) NOT NULL default '', 263 270 PRIMARY KEY (id), 264 KEY domain (domain ,path)271 KEY domain (domain(140),path(51)) 265 272 ) $charset_collate; 266 273 CREATE TABLE $wpdb->sitemeta ( 267 274 meta_id bigint(20) NOT NULL auto_increment, … … 269 276 meta_key varchar(255) default NULL, 270 277 meta_value longtext, 271 278 PRIMARY KEY (meta_id), 272 KEY meta_key (meta_key ),279 KEY meta_key (meta_key($max_index_length)), 273 280 KEY site_id (site_id) 274 281 ) $charset_collate; 275 282 CREATE TABLE $wpdb->signups ( … … 288 295 KEY activation_key (activation_key), 289 296 KEY user_email (user_email), 290 297 KEY user_login_email (user_login,user_email), 291 KEY domain_path (domain ,path)298 KEY domain_path (domain(140),path(51)) 292 299 ) $charset_collate;"; 293 300 294 301 switch ( $scope ) { -
src/wp-admin/includes/upgrade.php
519 519 if ( $wp_current_db_version < 29630 ) 520 520 upgrade_400(); 521 521 522 if ( $wp_current_db_version < 31349 ) 523 upgrade_420(); 524 522 525 maybe_disable_link_manager(); 523 526 524 527 maybe_disable_automattic_widgets(); … … 1407 1410 } 1408 1411 1409 1412 /** 1413 * Execute changes made in WordPress 4.2.0. 1414 * 1415 * @since 4.2.0 1416 */ 1417 function upgrade_420() { 1418 global $wp_current_db_version, $wpdb; 1419 1420 if ( $wp_current_db_version < 31349 && $wpdb->charset === 'utf8mb4' ) { 1421 if ( is_multisite() ) { 1422 $tables = $wpdb->tables( 'blog' ); 1423 } else { 1424 $tables = $wpdb->tables( 'all' ); 1425 } 1426 1427 foreach ( $tables as $table ) { 1428 maybe_convert_table_to_utf8mb4( $table ); 1429 } 1430 } 1431 } 1432 1433 /** 1410 1434 * Executes network-level upgrade routines. 1411 1435 * 1412 1436 * @since 3.0.0 … … 1502 1526 update_site_option( 'illegal_names', $illegal_names ); 1503 1527 } 1504 1528 } 1529 1530 // 4.2 1531 if ( $wp_current_db_version < 31349 && $wpdb->charset === 'utf8mb4' ) { 1532 if ( ! ( defined( 'DO_NOT_UPGRADE_GLOBAL_TABLES' ) && DO_NOT_UPGRADE_GLOBAL_TABLES ) ) { 1533 $wpdb->query( "ALTER TABLE $wpdb->site DROP INDEX domain, ADD INDEX domain(domain(140),path(51))" ); 1534 $wpdb->query( "ALTER TABLE $wpdb->sitemeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" ); 1535 $wpdb->query( "ALTER TABLE $wpdb->signups DROP INDEX domain, ADD INDEX domain(domain(140),path(51))" ); 1536 1537 $tables = $wpdb->tables( 'global' ); 1538 1539 foreach ( $tables as $table ) { 1540 maybe_convert_table_to_utf8mb4( $table ); 1541 } 1542 } 1543 } 1505 1544 } 1506 1545 1507 1546 // … … 1608 1647 } 1609 1648 1610 1649 /** 1650 * If a table only contains utf8 or utf8mb4 columns, convert it to utf8mb4. 1651 * 1652 * @since 4.2.0 1653 * 1654 * @param string $table The table to convert. 1655 * @return bool true if the table was converted, false if it wasn't. 1656 */ 1657 function maybe_convert_table_to_utf8mb4( $table ) { 1658 global $wpdb; 1659 1660 $results = $wpdb->get_results( "SHOW FULL COLUMNS FROM `$table`" ); 1661 if ( ! $results ) { 1662 return false; 1663 } 1664 1665 $has_utf8 = false; 1666 foreach ( $results as $column ) { 1667 if ( $column->Collation ) { 1668 if ( 'utf8' === $column->Collation ) { 1669 $has_utf8 = true; 1670 } elseif ( 'utf8mb4' !== $column->Collation ) { 1671 // Don't upgrade tables that have non-utf8 columns. 1672 return false; 1673 } 1674 } 1675 } 1676 1677 if ( ! $has_utf8 ) { 1678 // Don't bother upgrading tables that don't have utf8 columns. 1679 return false; 1680 } 1681 1682 return $wpdb->query( "ALTER TABLE $table CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci" ); 1683 } 1684 1685 /** 1611 1686 * Retrieve all options as it was for 1.2. 1612 1687 * 1613 1688 * @since 1.2.0 … … 2284 2359 // dbDelta() can recreate but can't drop the index. 2285 2360 $wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX slug" ); 2286 2361 } 2362 2363 // Upgrade versions prior to 4.2. 2364 if ( $wp_current_db_version < 31349 ) { 2365 // So that we can change tables to utf8mb4, we need to shorten the index lengths to less than 767 bytes 2366 $wpdb->query( "ALTER TABLE $wpdb->usermeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" ); 2367 $wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX slug, ADD INDEX slug(slug(191))" ); 2368 $wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX name, ADD INDEX name(name(191))" ); 2369 $wpdb->query( "ALTER TABLE $wpdb->commentmeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" ); 2370 $wpdb->query( "ALTER TABLE $wpdb->postmeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" ); 2371 $wpdb->query( "ALTER TABLE $wpdb->posts DROP INDEX post_name, ADD INDEX post_name(post_name(191))" ); 2372 } 2287 2373 } 2288 2374 2289 2375 /** -
src/wp-admin/setup-config.php
280 280 case 'DB_HOST' : 281 281 $config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'" . addcslashes( constant( $constant ), "\\'" ) . "');\r\n"; 282 282 break; 283 case 'DB_CHARSET' : 284 if ( 'utf8mb4' === $wpdb->charset || ( ! $wpdb->charset && $wpdb->has_cap( 'utf8mb4' ) ) ) { 285 $config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'utf8mb4');\r\n"; 286 } 287 break; 283 288 case 'AUTH_KEY' : 284 289 case 'SECURE_AUTH_KEY' : 285 290 case 'LOGGED_IN_KEY' : -
src/wp-includes/version.php
11 11 * 12 12 * @global int $wp_db_version 13 13 */ 14 $wp_db_version = 3 0133;14 $wp_db_version = 31349; 15 15 16 16 /** 17 17 * Holds the TinyMCE version -
src/wp-includes/wp-db.php
624 624 } 625 625 } 626 626 627 $this->init_charset();628 629 627 $this->dbuser = $dbuser; 630 628 $this->dbpassword = $dbpassword; 631 629 $this->dbname = $dbname; … … 717 715 public function init_charset() { 718 716 if ( function_exists('is_multisite') && is_multisite() ) { 719 717 $this->charset = 'utf8'; 720 if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) 718 if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) { 721 719 $this->collate = DB_COLLATE; 722 else720 } else { 723 721 $this->collate = 'utf8_general_ci'; 722 } 724 723 } elseif ( defined( 'DB_COLLATE' ) ) { 725 724 $this->collate = DB_COLLATE; 726 725 } 727 726 728 if ( defined( 'DB_CHARSET' ) ) 727 if ( defined( 'DB_CHARSET' ) ) { 729 728 $this->charset = DB_CHARSET; 729 } 730 731 if ( ( $this->use_mysqli && ! ( $this->dbh instanceof mysqli ) ) 732 || ( empty( $this->dbh ) || ! ( $this->dbh instanceof mysqli ) ) ) { 733 return; 734 } 735 736 if ( 'utf8' === $this->charset && $this->has_cap( 'utf8mb4' ) ) { 737 $this->charset = 'utf8mb4'; 738 } 739 740 if ( 'utf8mb4' === $this->charset && ( ! $this->collate || stripos( $this->collate, 'utf8_' ) === 0 ) ) { 741 $this->collate = 'utf8mb4_unicode_ci'; 742 } 730 743 } 731 744 732 745 /** … … 1476 1489 1477 1490 return false; 1478 1491 } elseif ( $this->dbh ) { 1492 if ( ! $this->has_connected ) { 1493 $this->init_charset(); 1494 } 1495 1479 1496 $this->has_connected = true; 1497 1480 1498 $this->set_charset( $this->dbh ); 1499 1481 1500 $this->ready = true; 1482 1501 $this->set_sql_mode(); 1483 1502 $this->select( $this->dbname, $this->dbh ); … … 2249 2268 * Retrieves the character set for the given column. 2250 2269 * 2251 2270 * @since 4.2.0 2252 * @access p rotected2271 * @access public 2253 2272 * 2254 2273 * @param string $table Table name. 2255 2274 * @param string $column Column name. … … 2256 2275 * @return mixed Column character set as a string. False if the column has no 2257 2276 * character set. {@see WP_Error} object if there was an error. 2258 2277 */ 2259 p rotectedfunction get_col_charset( $table, $column ) {2278 public function get_col_charset( $table, $column ) { 2260 2279 $tablekey = strtolower( $table ); 2261 2280 $columnkey = strtolower( $column ); 2262 2281 … … 2356 2375 'gb2312' => 'EUC-CN', 2357 2376 'ujis' => 'EUC-JP', 2358 2377 'utf32' => 'UTF-32', 2359 'utf8mb4' => 'UTF-8',2360 2378 ); 2361 2379 2362 2380 $supported_charsets = array(); … … 2391 2409 } 2392 2410 } 2393 2411 2394 // utf8 (mb3)can be handled by regex, which is a bunch faster than a DB lookup.2395 if ( 'utf8' === $charset || 'utf8mb3' === $charset ) {2412 // utf8 can be handled by regex, which is a bunch faster than a DB lookup. 2413 if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) { 2396 2414 $regex = '/ 2397 2415 ( 2398 2416 (?: [\x00-\x7F] # single-byte sequences 0xxxxxxx … … 2400 2418 | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 2401 2419 | [\xE1-\xEC][\x80-\xBF]{2} 2402 2420 | \xED[\x80-\x9F][\x80-\xBF] 2403 | [\xEE-\xEF][\x80-\xBF]{2} 2404 ){1,50} # ...one or more times 2421 | [\xEE-\xEF][\x80-\xBF]{2}'; 2422 2423 if ( 'utf8mb4' === $charset) { 2424 $regex .= ' 2425 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 2426 | [\xF1-\xF3][\x80-\xBF]{3} 2427 | \xF4[\x80-\x8F][\x80-\xBF]{2} 2428 '; 2429 } 2430 2431 $regex .= '){1,50} # ...one or more times 2405 2432 ) 2406 2433 | . # anything else 2407 2434 /x'; -
tests/phpunit/tests/db/charset.php
130 130 } 131 131 132 132 /** 133 * @ ticket 21212133 * @ticket 21212 134 134 */ 135 135 function test_process_fields_failure() { 136 136 global $wpdb; 137 $data = array( 'post_content' => "H€llo\xf0\x9f\x98\x88World¢" ); 137 // \xf0\xff\xff\xff is invalid in utf8 and utf8mb4. 138 $data = array( 'post_content' => "H€llo\xf0\xff\xff\xffWorld¢" ); 138 139 $this->assertFalse( self::$_wpdb->process_fields( $wpdb->posts, $data, null ) ); 139 140 } 140 141 … … 436 437 */ 437 438 function test_invalid_characters_in_query() { 438 439 global $wpdb; 439 $this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\x 9f\x98\x88bar')" ) );440 $this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\xff\xff\xffbar')" ) ); 440 441 } 441 442 }