Ticket #21212: 21212-utf8mb4.5.diff
| File 21212-utf8mb4.5.diff, 9.6 KB (added by , 11 years ago) |
|---|
-
src/wp-admin/includes/schema.php
51 51 slug varchar(200) NOT NULL default '', 52 52 term_group bigint(10) NOT NULL default 0, 53 53 PRIMARY KEY (term_id), 54 KEY slug (slug ),55 KEY name (name )54 KEY slug (slug(191)), 55 KEY name (name(191)) 56 56 ) $charset_collate; 57 57 CREATE TABLE $wpdb->term_taxonomy ( 58 58 term_taxonomy_id bigint(20) unsigned NOT NULL auto_increment, … … 79 79 meta_value longtext, 80 80 PRIMARY KEY (meta_id), 81 81 KEY comment_id (comment_id), 82 KEY meta_key (meta_key )82 KEY meta_key (meta_key(191)) 83 83 ) $charset_collate; 84 84 CREATE TABLE $wpdb->comments ( 85 85 comment_ID bigint(20) unsigned NOT NULL auto_increment, … … 136 136 meta_value longtext, 137 137 PRIMARY KEY (meta_id), 138 138 KEY post_id (post_id), 139 KEY meta_key (meta_key )139 KEY meta_key (meta_key(191)) 140 140 ) $charset_collate; 141 141 CREATE TABLE $wpdb->posts ( 142 142 ID bigint(20) unsigned NOT NULL auto_increment, … … 163 163 post_mime_type varchar(100) NOT NULL default '', 164 164 comment_count bigint(20) NOT NULL default '0', 165 165 PRIMARY KEY (ID), 166 KEY post_name (post_name ),166 KEY post_name (post_name(191)), 167 167 KEY type_status_date (post_type,post_status,post_date,ID), 168 168 KEY post_parent (post_parent), 169 169 KEY post_author (post_author) … … 213 213 meta_value longtext, 214 214 PRIMARY KEY (umeta_id), 215 215 KEY user_id (user_id), 216 KEY meta_key (meta_key )216 KEY meta_key (meta_key(191)) 217 217 ) $charset_collate;\n"; 218 218 219 219 // Global tables … … 261 261 domain varchar(200) NOT NULL default '', 262 262 path varchar(100) NOT NULL default '', 263 263 PRIMARY KEY (id), 264 KEY domain (domain ,path)264 KEY domain (domain(140),path(51)) 265 265 ) $charset_collate; 266 266 CREATE TABLE $wpdb->sitemeta ( 267 267 meta_id bigint(20) NOT NULL auto_increment, … … 269 269 meta_key varchar(255) default NULL, 270 270 meta_value longtext, 271 271 PRIMARY KEY (meta_id), 272 KEY meta_key (meta_key ),272 KEY meta_key (meta_key(191)), 273 273 KEY site_id (site_id) 274 274 ) $charset_collate; 275 275 CREATE TABLE $wpdb->signups ( … … 288 288 KEY activation_key (activation_key), 289 289 KEY user_email (user_email), 290 290 KEY user_login_email (user_login,user_email), 291 KEY domain_path (domain ,path)291 KEY domain_path (domain(140),path(51)) 292 292 ) $charset_collate;"; 293 293 294 294 switch ( $scope ) { -
src/wp-admin/includes/upgrade.php
519 519 if ( $wp_current_db_version < 29630 ) 520 520 upgrade_400(); 521 521 522 if ( $wp_current_db_version < 30134 ) 523 upgrade_420(); 524 522 525 maybe_disable_link_manager(); 523 526 524 527 maybe_disable_automattic_widgets(); … … 1407 1410 } 1408 1411 1409 1412 /** 1413 * Execute changes made in WordPress 4.2.0. 1414 * 1415 * @since 4.2.0 1416 */ 1417 function upgrade_420() { 1418 global $wp_current_db_version, $wpdb; 1419 1420 if ( $wp_current_db_version < 30134 && $wpdb->charset === 'utf8mb4' ) { 1421 if ( is_multisite() ) { 1422 $tables = $wpdb->tables; 1423 } else { 1424 $tables = array_merge( $wpdb->tables, $wpdb->global_tables ); 1425 } 1426 1427 foreach ( $tables as $table ) { 1428 $results = $wpdb->get_results( "SHOW FULL COLUMNS FROM `{$wpdb->$table}`" ); 1429 if ( ! $results ) { 1430 continue; 1431 } 1432 1433 foreach ( $results as $column ) { 1434 if ( $column->Collation && 'utf8' !== $column->Collation ) { 1435 // Don't upgrade tables that have non-utf8 columns 1436 continue 2; 1437 } 1438 } 1439 1440 $wpdb->query( "ALTER TABLE {$wpdb->$table} CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci" ); 1441 } 1442 } 1443 } 1444 1445 /** 1410 1446 * Executes network-level upgrade routines. 1411 1447 * 1412 1448 * @since 3.0.0 … … 1502 1538 update_site_option( 'illegal_names', $illegal_names ); 1503 1539 } 1504 1540 } 1541 1542 // 4.2 1543 if ( $wp_current_db_version < 30134 && $wpdb->charset === 'utf8mb4') { 1544 $tables = array_merge( $wpdb->ms_global_tables, $wpdb->global_tables ); 1545 1546 foreach ( $tables as $table ) { 1547 $wpdb->query( "ALTER TABLE {$wpdb->$table} CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci" ); 1548 } 1549 } 1505 1550 } 1506 1551 1507 1552 // … … 2284 2329 // dbDelta() can recreate but can't drop the index. 2285 2330 $wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX slug" ); 2286 2331 } 2332 2333 if ( $wp_current_db_version < 30134 ) { 2334 // We need to alter some indices 2335 $wpdb->query( "ALTER TABLE $wpdb->usermeta DROP INDEX meta_key" ); 2336 $wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX slug" ); 2337 $wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX name" ); 2338 $wpdb->query( "ALTER TABLE $wpdb->commentmeta DROP INDEX meta_key" ); 2339 $wpdb->query( "ALTER TABLE $wpdb->postmeta DROP INDEX meta_key" ); 2340 $wpdb->query( "ALTER TABLE $wpdb->posts DROP INDEX post_name" ); 2341 } 2287 2342 } 2288 2343 2289 2344 /** -
src/wp-admin/setup-config.php
280 280 case 'DB_HOST' : 281 281 $config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'" . addcslashes( constant( $constant ), "\\'" ) . "');\r\n"; 282 282 break; 283 case 'DB_CHARSET' : 284 if ( 'utf8mb4' === $wpdb->charset || ( ! $wpdb->charset && $wpdb->has_cap( 'utf8mb4' ) ) ) { 285 $config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'utf8mb4');\r\n"; 286 } 287 break; 283 288 case 'AUTH_KEY' : 284 289 case 'SECURE_AUTH_KEY' : 285 290 case 'LOGGED_IN_KEY' : -
src/wp-includes/version.php
11 11 * 12 12 * @global int $wp_db_version 13 13 */ 14 $wp_db_version = 3013 3;14 $wp_db_version = 30134; 15 15 16 16 /** 17 17 * Holds the TinyMCE version -
src/wp-includes/wp-db.php
624 624 } 625 625 } 626 626 627 $this->init_charset();628 629 627 $this->dbuser = $dbuser; 630 628 $this->dbpassword = $dbpassword; 631 629 $this->dbname = $dbname; … … 727 725 728 726 if ( defined( 'DB_CHARSET' ) ) 729 727 $this->charset = DB_CHARSET; 728 729 if ( 'utf8' === $this->charset && $this->has_cap( 'utf8mb4' ) ) { 730 $this->charset = 'utf8mb4'; 731 } 732 733 if ( 'utf8mb4' === $this->charset && ( ! $this->collate || stripos( $this->collate, 'utf8_' ) === 0 ) ) { 734 $this->collate = 'utf8mb4_unicode_ci'; 735 } 730 736 } 731 737 732 738 /** … … 1477 1483 return false; 1478 1484 } elseif ( $this->dbh ) { 1479 1485 $this->has_connected = true; 1486 1487 $this->init_charset(); 1480 1488 $this->set_charset( $this->dbh ); 1489 1481 1490 $this->ready = true; 1482 1491 $this->set_sql_mode(); 1483 1492 $this->select( $this->dbname, $this->dbh ); … … 2249 2258 * Retrieves the character set for the given column. 2250 2259 * 2251 2260 * @since 4.2.0 2252 * @access p rotected2261 * @access public 2253 2262 * 2254 2263 * @param string $table Table name. 2255 2264 * @param string $column Column name. 2256 2265 * @return mixed Column character set as a string. False if the column has no 2257 2266 * character set. {@see WP_Error} object if there was an error. 2258 2267 */ 2259 p rotectedfunction get_col_charset( $table, $column ) {2268 public function get_col_charset( $table, $column ) { 2260 2269 $tablekey = strtolower( $table ); 2261 2270 $columnkey = strtolower( $column ); 2262 2271 … … 2356 2365 'gb2312' => 'EUC-CN', 2357 2366 'ujis' => 'EUC-JP', 2358 2367 'utf32' => 'UTF-32', 2359 'utf8mb4' => 'UTF-8',2360 2368 ); 2361 2369 2362 2370 $supported_charsets = array(); … … 2391 2399 } 2392 2400 } 2393 2401 2394 // utf8 (mb3)can be handled by regex, which is a bunch faster than a DB lookup.2395 if ( 'utf8' === $charset || 'utf8mb3' === $charset ) {2402 // utf8 can be handled by regex, which is a bunch faster than a DB lookup. 2403 if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) { 2396 2404 $regex = '/ 2397 2405 ( 2398 2406 (?: [\x00-\x7F] # single-byte sequences 0xxxxxxx … … 2400 2408 | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 2401 2409 | [\xE1-\xEC][\x80-\xBF]{2} 2402 2410 | \xED[\x80-\x9F][\x80-\xBF] 2403 | [\xEE-\xEF][\x80-\xBF]{2} 2404 ){1,50} # ...one or more times 2411 | [\xEE-\xEF][\x80-\xBF]{2}'; 2412 2413 if ( 'utf8mb4' === $charset) { 2414 $regex .= ' 2415 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 2416 | [\xF1-\xF3][\x80-\xBF]{3} 2417 | \xF4[\x80-\x8F][\x80-\xBF]{2} 2418 '; 2419 } 2420 2421 $regex .= '){1,50} # ...one or more times 2405 2422 ) 2406 2423 | . # anything else 2407 2424 /x'; -
tests/phpunit/tests/db/charset.php
130 130 } 131 131 132 132 /** 133 * @ ticket 21212133 * @ticket 21212 134 134 */ 135 135 function test_process_fields_failure() { 136 136 global $wpdb; 137 $data = array( 'post_content' => "H€llo\xf0\x9f\x98\x88World¢" ); 137 // \xf0\xff\xff\xff is invalid in utf8 and utf8mb4 138 $data = array( 'post_content' => "H€llo\xf0\xff\xff\xffWorld¢" ); 138 139 $this->assertFalse( self::$_wpdb->process_fields( $wpdb->posts, $data, null ) ); 139 140 } 140 141 … … 436 437 */ 437 438 function test_invalid_characters_in_query() { 438 439 global $wpdb; 439 $this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\x 9f\x98\x88bar')" ) );440 $this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\xff\xff\xffbar')" ) ); 440 441 } 441 442 }