Ticket #21212: 21212.2.diff
| File 21212.2.diff, 6.5 KB (added by , 11 years ago) |
|---|
-
src/wp-admin/includes/upgrade.php
513 513 if ( $wp_current_db_version < 29630 ) 514 514 upgrade_400(); 515 515 516 if ( $wp_current_db_version < 30134 ) 517 upgrade_420(); 518 516 519 maybe_disable_link_manager(); 517 520 518 521 maybe_disable_automattic_widgets(); … … 1400 1403 } 1401 1404 1402 1405 /** 1406 * Execute changes made in WordPress 4.2.0. 1407 * 1408 * @since 4.2.0 1409 */ 1410 function upgrade_420() { 1411 global $wp_current_db_version, $wpdb; 1412 if ( $wp_current_db_version < 30134 && $wpdb->charset === 'utf8mb4') { 1413 if ( is_multisite() ) { 1414 $tables = $wpdb->tables; 1415 } else { 1416 $tables = array_merge( $wpdb->tables, $wpdb->global_tables ); 1417 } 1418 1419 foreach ( $tables as $table ) { 1420 $wpdb->query( "ALTER TABLE {$wpdb->$table} CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci" ); 1421 } 1422 } 1423 } 1424 1425 /** 1403 1426 * Execute network level changes 1404 1427 * 1405 1428 * @since 3.0.0 … … 1495 1518 update_site_option( 'illegal_names', $illegal_names ); 1496 1519 } 1497 1520 } 1521 1522 // 4.2 1523 if ( $wp_current_db_version < 30134 && $wpdb->charset === 'utf8mb4') { 1524 $tables = array_merge( $wpdb->ms_global_tables, $wpdb->global_tables ); 1525 1526 foreach ( $tables as $table ) { 1527 $wpdb->query( "ALTER TABLE {$wpdb->$table} CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci" ); 1528 } 1529 } 1498 1530 } 1499 1531 1500 1532 // The functions we use to actually do stuff -
src/wp-admin/setup-config.php
280 280 case 'DB_HOST' : 281 281 $config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'" . addcslashes( constant( $constant ), "\\'" ) . "');\r\n"; 282 282 break; 283 case 'DB_CHARSET' : 284 if ( 'utf8mb4' === $wpdb->charset || ( ! $wpdb->charset && $wpdb->has_cap( 'utf8mb4' ) ) ) { 285 $config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'utf8mb4');\r\n"; 286 } 287 break; 283 288 case 'AUTH_KEY' : 284 289 case 'SECURE_AUTH_KEY' : 285 290 case 'LOGGED_IN_KEY' : -
src/wp-includes/version.php
11 11 * 12 12 * @global int $wp_db_version 13 13 */ 14 $wp_db_version = 3013 3;14 $wp_db_version = 30134; 15 15 16 16 /** 17 17 * Holds the TinyMCE version -
src/wp-includes/wp-db.php
624 624 } 625 625 } 626 626 627 $this->init_charset();628 629 627 $this->dbuser = $dbuser; 630 628 $this->dbpassword = $dbpassword; 631 629 $this->dbname = $dbname; … … 727 725 728 726 if ( defined( 'DB_CHARSET' ) ) 729 727 $this->charset = DB_CHARSET; 728 729 if ( 'utf8' === $this->charset && $this->has_cap( 'utf8mb4' ) ) { 730 $this->charset = 'utf8mb4'; 731 } 732 733 if ( 'utf8mb4' === $this->charset && ( ! $this->collate || stripos( $this->collate, 'utf8_' ) === 0 ) ) { 734 $this->collate = 'utf8mb4_unicode_ci'; 735 } 730 736 } 731 737 732 738 /** … … 1477 1483 return false; 1478 1484 } elseif ( $this->dbh ) { 1479 1485 $this->has_connected = true; 1486 1487 $this->init_charset(); 1480 1488 $this->set_charset( $this->dbh ); 1489 1481 1490 $this->ready = true; 1482 1491 $this->set_sql_mode(); 1483 1492 $this->select( $this->dbname, $this->dbh ); … … 2356 2365 'gb2312' => 'EUC-CN', 2357 2366 'ujis' => 'EUC-JP', 2358 2367 'utf32' => 'UTF-32', 2359 'utf8mb4' => 'UTF-8',2360 2368 ); 2361 2369 2362 2370 $supported_charsets = array(); … … 2391 2399 } 2392 2400 } 2393 2401 2394 // utf8 (mb3)can be handled by regex, which is a bunch faster than a DB lookup.2395 if ( 'utf8' === $charset || 'utf8mb3' === $charset ) {2402 // utf8 can be handled by regex, which is a bunch faster than a DB lookup. 2403 if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) { 2396 2404 $regex = '/ 2397 2405 ( 2398 2406 (?: [\x00-\x7F] # single-byte sequences 0xxxxxxx … … 2400 2408 | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 2401 2409 | [\xE1-\xEC][\x80-\xBF]{2} 2402 2410 | \xED[\x80-\x9F][\x80-\xBF] 2403 | [\xEE-\xEF][\x80-\xBF]{2} 2404 ){1,50} # ...one or more times 2411 | [\xEE-\xEF][\x80-\xBF]{2}'; 2412 2413 if ( 'utf8mb4' === $charset) { 2414 $regex .= ' 2415 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 2416 | [\xF1-\xF3][\x80-\xBF]{3} 2417 | \xF4[\x80-\x8F][\x80-\xBF]{2} 2418 '; 2419 } 2420 2421 $regex .= '){1,50} # ...one or more times 2405 2422 ) 2406 2423 | . # anything else 2407 2424 /x'; … … 2526 2543 return $charset; 2527 2544 } 2528 2545 2546 if ( $this->posts === $table && 'post_content' === $column && 'utf8' === $charset && function_exists( 'mb_convert_encoding' ) ) { 2547 $regex = '/([\xE0-\xEF][\x80-\xBF]{2}|[\xF0-\xF7][\x80-\xBF]{3})/'; 2548 $matches = array(); 2549 if ( preg_match_all( $regex, $value, $matches ) ) { 2550 error_log( print_r( $matches, true ) ); 2551 if ( ! empty( $matches[1] ) ) { 2552 foreach( $matches[1] as $emoji ) { 2553 $unpacked = unpack( 'H*', mb_convert_encoding( $emoji, 'UTF-32', 'UTF-8' ) ); 2554 if ( isset( $unpacked[1] ) ) { 2555 $entity = '&#x' . trim( $unpacked[1], '0' ) . ';'; 2556 $value = str_replace( $emoji, $entity, $value ); 2557 } 2558 } 2559 } 2560 } 2561 } 2562 2529 2563 $data = array( 2530 2564 $column => array( 2531 2565 'value' => $value, -
tests/phpunit/tests/db/charset.php
130 130 } 131 131 132 132 /** 133 * @ ticket 21212133 * @ticket 21212 134 134 */ 135 135 function test_process_fields_failure() { 136 136 global $wpdb; 137 $data = array( 'post_content' => "H€llo\xf0\x9f\x98\x88World¢" ); 137 // \xf0\xff\xff\xff is invalid in utf8 and utf8mb4 138 $data = array( 'post_content' => "H€llo\xf0\xff\xff\xffWorld¢" ); 138 139 $this->assertFalse( self::$_wpdb->process_fields( $wpdb->posts, $data, null ) ); 139 140 } 140 141 … … 436 437 */ 437 438 function test_invalid_characters_in_query() { 438 439 global $wpdb; 439 $this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\x 9f\x98\x88bar')" ) );440 $this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\xff\xff\xffbar')" ) ); 440 441 } 441 442 }