Ticket #21212: 21212-utf8mb4.7.diff
| File 21212-utf8mb4.7.diff, 11.4 KB (added by , 11 years ago) |
|---|
-
src/wp-admin/includes/schema.php
51 51 slug varchar(200) NOT NULL default '', 52 52 term_group bigint(10) NOT NULL default 0, 53 53 PRIMARY KEY (term_id), 54 KEY slug (slug ),55 KEY name (name )54 KEY slug (slug(191)), 55 KEY name (name(191)) 56 56 ) $charset_collate; 57 57 CREATE TABLE $wpdb->term_taxonomy ( 58 58 term_taxonomy_id bigint(20) unsigned NOT NULL auto_increment, … … 79 79 meta_value longtext, 80 80 PRIMARY KEY (meta_id), 81 81 KEY comment_id (comment_id), 82 KEY meta_key (meta_key )82 KEY meta_key (meta_key(191)) 83 83 ) $charset_collate; 84 84 CREATE TABLE $wpdb->comments ( 85 85 comment_ID bigint(20) unsigned NOT NULL auto_increment, … … 136 136 meta_value longtext, 137 137 PRIMARY KEY (meta_id), 138 138 KEY post_id (post_id), 139 KEY meta_key (meta_key )139 KEY meta_key (meta_key(191)) 140 140 ) $charset_collate; 141 141 CREATE TABLE $wpdb->posts ( 142 142 ID bigint(20) unsigned NOT NULL auto_increment, … … 163 163 post_mime_type varchar(100) NOT NULL default '', 164 164 comment_count bigint(20) NOT NULL default '0', 165 165 PRIMARY KEY (ID), 166 KEY post_name (post_name ),166 KEY post_name (post_name(191)), 167 167 KEY type_status_date (post_type,post_status,post_date,ID), 168 168 KEY post_parent (post_parent), 169 169 KEY post_author (post_author) … … 213 213 meta_value longtext, 214 214 PRIMARY KEY (umeta_id), 215 215 KEY user_id (user_id), 216 KEY meta_key (meta_key )216 KEY meta_key (meta_key(191)) 217 217 ) $charset_collate;\n"; 218 218 219 219 // Global tables … … 261 261 domain varchar(200) NOT NULL default '', 262 262 path varchar(100) NOT NULL default '', 263 263 PRIMARY KEY (id), 264 KEY domain (domain ,path)264 KEY domain (domain(140),path(51)) 265 265 ) $charset_collate; 266 266 CREATE TABLE $wpdb->sitemeta ( 267 267 meta_id bigint(20) NOT NULL auto_increment, … … 269 269 meta_key varchar(255) default NULL, 270 270 meta_value longtext, 271 271 PRIMARY KEY (meta_id), 272 KEY meta_key (meta_key ),272 KEY meta_key (meta_key(191)), 273 273 KEY site_id (site_id) 274 274 ) $charset_collate; 275 275 CREATE TABLE $wpdb->signups ( … … 288 288 KEY activation_key (activation_key), 289 289 KEY user_email (user_email), 290 290 KEY user_login_email (user_login,user_email), 291 KEY domain_path (domain ,path)291 KEY domain_path (domain(140),path(51)) 292 292 ) $charset_collate;"; 293 293 294 294 switch ( $scope ) { -
src/wp-admin/includes/upgrade.php
519 519 if ( $wp_current_db_version < 29630 ) 520 520 upgrade_400(); 521 521 522 if ( $wp_current_db_version < 30134 ) 523 upgrade_420(); 524 522 525 maybe_disable_link_manager(); 523 526 524 527 maybe_disable_automattic_widgets(); … … 1407 1410 } 1408 1411 1409 1412 /** 1413 * Execute changes made in WordPress 4.2.0. 1414 * 1415 * @since 4.2.0 1416 */ 1417 function upgrade_420() { 1418 global $wp_current_db_version, $wpdb; 1419 1420 if ( $wp_current_db_version < 30134 && $wpdb->charset === 'utf8mb4' ) { 1421 if ( is_multisite() ) { 1422 $tables = $wpdb->tables; 1423 } else { 1424 $tables = array_merge( $wpdb->tables, $wpdb->global_tables ); 1425 } 1426 1427 foreach ( $tables as $table ) { 1428 $results = $wpdb->get_results( "SHOW FULL COLUMNS FROM `{$wpdb->$table}`" ); 1429 if ( ! $results ) { 1430 continue; 1431 } 1432 1433 $has_utf8 = false; 1434 foreach ( $results as $column ) { 1435 if ( $column->Collation ) { 1436 if ( 'utf8' === $column->Collation ) { 1437 $has_utf8 = true; 1438 } else { 1439 // Don't upgrade tables that have non-utf8 columns 1440 continue 2; 1441 } 1442 } 1443 } 1444 1445 if ( ! $has_utf8 ) { 1446 // Don't bother upgrading tables that don't have utf8 columns 1447 continue; 1448 } 1449 1450 $wpdb->query( "ALTER TABLE {$wpdb->$table} CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci" ); 1451 } 1452 } 1453 } 1454 1455 /** 1410 1456 * Executes network-level upgrade routines. 1411 1457 * 1412 1458 * @since 3.0.0 … … 1502 1548 update_site_option( 'illegal_names', $illegal_names ); 1503 1549 } 1504 1550 } 1551 1552 // 4.2 1553 if ( $wp_current_db_version < 30134 && $wpdb->charset === 'utf8mb4') { 1554 $wpdb->query( "ALTER TABLE $wpdb->site DROP INDEX domain, ADD INDEX domain(domain(140),path(51))" ); 1555 $wpdb->query( "ALTER TABLE $wpdb->sitemeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" ); 1556 $wpdb->query( "ALTER TABLE $wpdb->signups DROP INDEX domain, ADD INDEX domain(domain(140),path(51))" ); 1557 1558 $tables = array_merge( $wpdb->ms_global_tables, $wpdb->global_tables ); 1559 1560 foreach ( $tables as $table ) { 1561 $results = $wpdb->get_results( "SHOW FULL COLUMNS FROM `{$wpdb->$table}`" ); 1562 if ( ! $results ) { 1563 continue; 1564 } 1565 1566 $has_utf8 = false; 1567 foreach ( $results as $column ) { 1568 if ( $column->Collation ) { 1569 if ( 'utf8' === $column->Collation ) { 1570 $has_utf8 = true; 1571 } else { 1572 // Don't upgrade tables that have non-utf8 columns 1573 continue 2; 1574 } 1575 } 1576 } 1577 1578 if ( ! $has_utf8 ) { 1579 // Don't bother upgrading tables that don't have utf8 columns 1580 continue; 1581 } 1582 1583 $wpdb->query( "ALTER TABLE {$wpdb->$table} CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci" ); 1584 } 1585 } 1505 1586 } 1506 1587 1507 1588 // … … 2284 2365 // dbDelta() can recreate but can't drop the index. 2285 2366 $wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX slug" ); 2286 2367 } 2368 2369 if ( $wp_current_db_version < 30134 ) { 2370 // We need to alter some indices 2371 $wpdb->query( "ALTER TABLE $wpdb->usermeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" ); 2372 $wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX slug, ADD INDEX slug(slug(191))" ); 2373 $wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX name, ADD INDEX name(name(191))" ); 2374 $wpdb->query( "ALTER TABLE $wpdb->commentmeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" ); 2375 $wpdb->query( "ALTER TABLE $wpdb->postmeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" ); 2376 $wpdb->query( "ALTER TABLE $wpdb->posts DROP INDEX post_name, ADD INDEX post_name(post_name(191))" ); 2377 } 2287 2378 } 2288 2379 2289 2380 /** -
src/wp-admin/setup-config.php
280 280 case 'DB_HOST' : 281 281 $config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'" . addcslashes( constant( $constant ), "\\'" ) . "');\r\n"; 282 282 break; 283 case 'DB_CHARSET' : 284 if ( 'utf8mb4' === $wpdb->charset || ( ! $wpdb->charset && $wpdb->has_cap( 'utf8mb4' ) ) ) { 285 $config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'utf8mb4');\r\n"; 286 } 287 break; 283 288 case 'AUTH_KEY' : 284 289 case 'SECURE_AUTH_KEY' : 285 290 case 'LOGGED_IN_KEY' : -
src/wp-includes/version.php
11 11 * 12 12 * @global int $wp_db_version 13 13 */ 14 $wp_db_version = 3013 3;14 $wp_db_version = 30134; 15 15 16 16 /** 17 17 * Holds the TinyMCE version -
src/wp-includes/wp-db.php
624 624 } 625 625 } 626 626 627 $this->init_charset();628 629 627 $this->dbuser = $dbuser; 630 628 $this->dbpassword = $dbpassword; 631 629 $this->dbname = $dbname; … … 717 715 public function init_charset() { 718 716 if ( function_exists('is_multisite') && is_multisite() ) { 719 717 $this->charset = 'utf8'; 720 if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) 718 if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) { 721 719 $this->collate = DB_COLLATE; 722 else720 } else { 723 721 $this->collate = 'utf8_general_ci'; 722 } 724 723 } elseif ( defined( 'DB_COLLATE' ) ) { 725 724 $this->collate = DB_COLLATE; 726 725 } 727 726 728 if ( defined( 'DB_CHARSET' ) ) 727 if ( defined( 'DB_CHARSET' ) ) { 729 728 $this->charset = DB_CHARSET; 729 } 730 731 if ( ( $this->use_mysqli && ! ( $this->dbh instanceof mysqli ) ) 732 || ( empty( $this->dbh ) || ! ( $this->dbh instanceof mysqli ) ) ) { 733 return; 734 } 735 736 if ( 'utf8' === $this->charset && $this->has_cap( 'utf8mb4' ) ) { 737 $this->charset = 'utf8mb4'; 738 } 739 740 if ( 'utf8mb4' === $this->charset && ( ! $this->collate || stripos( $this->collate, 'utf8_' ) === 0 ) ) { 741 $this->collate = 'utf8mb4_unicode_ci'; 742 } 730 743 } 731 744 732 745 /** … … 1477 1490 return false; 1478 1491 } elseif ( $this->dbh ) { 1479 1492 $this->has_connected = true; 1493 1494 $this->init_charset(); 1480 1495 $this->set_charset( $this->dbh ); 1496 1481 1497 $this->ready = true; 1482 1498 $this->set_sql_mode(); 1483 1499 $this->select( $this->dbname, $this->dbh ); … … 2249 2265 * Retrieves the character set for the given column. 2250 2266 * 2251 2267 * @since 4.2.0 2252 * @access p rotected2268 * @access public 2253 2269 * 2254 2270 * @param string $table Table name. 2255 2271 * @param string $column Column name. … … 2256 2272 * @return mixed Column character set as a string. False if the column has no 2257 2273 * character set. {@see WP_Error} object if there was an error. 2258 2274 */ 2259 p rotectedfunction get_col_charset( $table, $column ) {2275 public function get_col_charset( $table, $column ) { 2260 2276 $tablekey = strtolower( $table ); 2261 2277 $columnkey = strtolower( $column ); 2262 2278 … … 2356 2372 'gb2312' => 'EUC-CN', 2357 2373 'ujis' => 'EUC-JP', 2358 2374 'utf32' => 'UTF-32', 2359 'utf8mb4' => 'UTF-8',2360 2375 ); 2361 2376 2362 2377 $supported_charsets = array(); … … 2391 2406 } 2392 2407 } 2393 2408 2394 // utf8 (mb3)can be handled by regex, which is a bunch faster than a DB lookup.2395 if ( 'utf8' === $charset || 'utf8mb3' === $charset ) {2409 // utf8 can be handled by regex, which is a bunch faster than a DB lookup. 2410 if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) { 2396 2411 $regex = '/ 2397 2412 ( 2398 2413 (?: [\x00-\x7F] # single-byte sequences 0xxxxxxx … … 2400 2415 | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 2401 2416 | [\xE1-\xEC][\x80-\xBF]{2} 2402 2417 | \xED[\x80-\x9F][\x80-\xBF] 2403 | [\xEE-\xEF][\x80-\xBF]{2} 2404 ){1,50} # ...one or more times 2418 | [\xEE-\xEF][\x80-\xBF]{2}'; 2419 2420 if ( 'utf8mb4' === $charset) { 2421 $regex .= ' 2422 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 2423 | [\xF1-\xF3][\x80-\xBF]{3} 2424 | \xF4[\x80-\x8F][\x80-\xBF]{2} 2425 '; 2426 } 2427 2428 $regex .= '){1,50} # ...one or more times 2405 2429 ) 2406 2430 | . # anything else 2407 2431 /x'; -
tests/phpunit/tests/db/charset.php
130 130 } 131 131 132 132 /** 133 * @ ticket 21212133 * @ticket 21212 134 134 */ 135 135 function test_process_fields_failure() { 136 136 global $wpdb; 137 $data = array( 'post_content' => "H€llo\xf0\x9f\x98\x88World¢" ); 137 // \xf0\xff\xff\xff is invalid in utf8 and utf8mb4 138 $data = array( 'post_content' => "H€llo\xf0\xff\xff\xffWorld¢" ); 138 139 $this->assertFalse( self::$_wpdb->process_fields( $wpdb->posts, $data, null ) ); 139 140 } 140 141 … … 436 437 */ 437 438 function test_invalid_characters_in_query() { 438 439 global $wpdb; 439 $this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\x 9f\x98\x88bar')" ) );440 $this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\xff\xff\xffbar')" ) ); 440 441 } 441 442 }