Changeset 32163 for branches/4.1/tests/phpunit/tests/db/charset.php
- Timestamp:
- 04/20/2015 05:08:00 AM (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/4.1/tests/phpunit/tests/db/charset.php
r30807 r32163 1 <?php 2 3 require_once dirname( dirname( __FILE__ ) ) . '/db.php'; 4 5 /** 6 * Test WPDB methods 7 * 8 * @group wpdb 9 */ 10 class Tests_DB_Charset extends WP_UnitTestCase { 11 12 /** 13 * Our special WPDB 14 * @var resource 15 */ 16 protected static $_wpdb; 17 18 public static function setUpBeforeClass() { 19 self::$_wpdb = new wpdb_exposed_methods_for_testing(); 20 } 21 22 /** 23 * @ticket 21212 24 */ 25 function data_strip_invalid_text() { 26 $fields = array( 27 'latin1' => array( 28 // latin1. latin1 never changes. 29 'charset' => 'latin1', 30 'value' => "\xf0\x9f\x8e\xb7", 31 'expected' => "\xf0\x9f\x8e\xb7" 32 ), 33 'ascii' => array( 34 // ascii gets special treatment, make sure it's covered 35 'charset' => 'ascii', 36 'value' => 'Hello World', 37 'expected' => 'Hello World' 38 ), 39 'utf8' => array( 40 // utf8 only allows <= 3-byte chars 41 'charset' => 'utf8', 42 'value' => "H€llo\xf0\x9f\x98\x88World¢", 43 'expected' => 'H€lloWorld¢' 44 ), 45 'utf8mb3' => array( 46 // utf8mb3 should behave the same an utf8 47 'charset' => 'utf8mb3', 48 'value' => "H€llo\xf0\x9f\x98\x88World¢", 49 'expected' => 'H€lloWorld¢' 50 ), 51 'utf8mb4' => array( 52 // utf8mb4 allows 4-byte characters, too 53 'charset' => 'utf8mb4', 54 'value' => "H€llo\xf0\x9f\x98\x88World¢", 55 'expected' => "H€llo\xf0\x9f\x98\x88World¢" 56 ), 57 'koi8r' => array( 58 // koi8r is a character set that needs to be checked in MySQL 59 'charset' => 'koi8r', 60 'value' => "\xfdord\xf2ress", 61 'expected' => "\xfdord\xf2ress", 62 'db' => true 63 ), 64 'hebrew' => array( 65 // hebrew needs to be checked in MySQL, too 66 'charset' => 'hebrew', 67 'value' => "\xf9ord\xf7ress", 68 'expected' => "\xf9ord\xf7ress", 69 'db' => true 70 ), 71 'false' => array( 72 // false is a column with no character set (ie, a number column) 73 'charset' => false, 74 'value' => 100, 75 'expected' => 100 76 ), 77 ); 78 79 if ( function_exists( 'mb_convert_encoding' ) ) { 80 // big5 is a non-Unicode multibyte charset 81 $utf8 = "a\xe5\x85\xb1b"; // UTF-8 Character 20849 82 $big5 = mb_convert_encoding( $utf8, 'BIG-5', 'UTF-8' ); 83 $conv_utf8 = mb_convert_encoding( $big5, 'UTF-8', 'BIG-5' ); 84 // Make sure PHP's multibyte conversions are working correctly 85 $this->assertNotEquals( $utf8, $big5 ); 86 $this->assertEquals( $utf8, $conv_utf8 ); 87 88 $fields['big5'] = array( 89 'charset' => 'big5', 90 'value' => $big5, 91 'expected' => $big5 92 ); 93 } 94 95 // The data above is easy to edit. Now, prepare it for the data provider. 96 $data_provider = $multiple = $multiple_expected = array(); 97 foreach ( $fields as $test_case => $field ) { 98 $expected = $field; 99 $expected['value'] = $expected['expected']; 100 unset( $expected['expected'], $field['expected'] ); 101 102 // We're keeping track of these for our multiple-field test. 103 $multiple[] = $field; 104 $multiple_expected[] = $expected; 105 106 // strip_invalid_text() expects an array of fields. We're testing one field at a time. 107 $data = array( $field ); 108 $expected = array( $expected ); 109 110 // First argument is field data. Second is expected. Third is the message. 111 $data_provider[] = array( $data, $expected, $test_case ); 112 } 113 114 // Time for our test of multiple fields at once. 115 $data_provider[] = array( $multiple, $multiple_expected, 'multiple fields/charsets' ); 116 117 return $data_provider; 118 } 119 120 /** 121 * @dataProvider data_strip_invalid_text 122 * @ticket 21212 123 */ 124 function test_strip_invalid_text( $data, $expected, $message ) { 125 $actual = self::$_wpdb->strip_invalid_text( $data ); 126 $this->assertSame( $expected, $actual, $message ); 127 } 128 129 /** 130 * @ticket 21212 131 */ 132 function test_process_fields_failure() { 133 global $wpdb; 134 135 $charset = $wpdb->get_col_charset( $wpdb->posts, 'post_content' ); 136 if ( 'utf8' !== $charset && 'utf8mb4' !== $charset ) { 137 $this->markTestSkipped( 'This test requires a utf8 character set' ); 138 } 139 140 // \xf0\xff\xff\xff is invalid in utf8 and utf8mb4. 141 $data = array( 'post_content' => "H€llo\xf0\xff\xff\xffWorld¢" ); 142 $this->assertFalse( self::$_wpdb->process_fields( $wpdb->posts, $data, null ) ); 143 } 144 145 /** 146 * @ticket 21212 147 */ 148 function data_process_field_charsets() { 149 if ( $GLOBALS['wpdb']->charset ) { 150 $charset = $GLOBALS['wpdb']->charset; 151 } else { 152 $charset = $GLOBALS['wpdb']->get_col_charset( $GLOBALS['wpdb']->posts, 'post_content' ); 153 } 154 155 // 'value' and 'format' are $data, 'charset' ends up as part of $expected 156 157 $no_string_fields = array( 158 'post_parent' => array( 'value' => 10, 'format' => '%d', 'charset' => false ), 159 'comment_count' => array( 'value' => 0, 'format' => '%d', 'charset' => false ), 160 ); 161 162 $all_ascii_fields = array( 163 'post_content' => array( 'value' => 'foo foo foo!', 'format' => '%s', 'charset' => false ), 164 'post_excerpt' => array( 'value' => 'bar bar bar!', 'format' => '%s', 'charset' => false ), 165 ); 166 167 // This is the same data used in process_field_charsets_for_nonexistent_table() 168 $non_ascii_string_fields = array( 169 'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s', 'charset' => $charset, 'ascii' => false ), 170 'post_excerpt' => array( 'value' => '¡bar bar bar!', 'format' => '%s', 'charset' => $charset, 'ascii' => false ), 171 ); 172 173 $vars = get_defined_vars(); 174 unset( $vars['charset'] ); 175 foreach ( $vars as $var_name => $var ) { 176 $data = $expected = $var; 177 foreach ( $data as &$datum ) { 178 // 'charset' and 'ascii' are part of the expected return only. 179 unset( $datum['charset'], $datum['ascii'] ); 180 } 181 182 $vars[ $var_name ] = array( $data, $expected, $var_name ); 183 } 184 185 return array_values( $vars ); 186 } 187 188 /** 189 * @dataProvider data_process_field_charsets 190 * @ticket 21212 191 */ 192 function test_process_field_charsets( $data, $expected, $message ) { 193 $actual = self::$_wpdb->process_field_charsets( $data, $GLOBALS['wpdb']->posts ); 194 $this->assertSame( $expected, $actual, $message ); 195 } 196 197 /** 198 * The test this test depends on first verifies that this 199 * would normally work against the posts table. 200 * 201 * @ticket 21212 202 * @depends test_process_field_charsets 203 */ 204 function test_process_field_charsets_on_nonexistent_table() { 205 $data = array( 'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s' ) ); 206 self::$_wpdb->suppress_errors( true ); 207 $this->assertFalse( self::$_wpdb->process_field_charsets( $data, 'nonexistent_table' ) ); 208 self::$_wpdb->suppress_errors( false ); 209 } 210 211 /** 212 * @ticket 21212 213 */ 214 function test_check_ascii() { 215 $ascii = "\0\t\n\r '" . '!"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'; 216 $this->assertTrue( self::$_wpdb->check_ascii( $ascii ) ); 217 } 218 219 /** 220 * @ticket 21212 221 */ 222 function test_check_ascii_false() { 223 $this->assertFalse( self::$_wpdb->check_ascii( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ¡©«' ) ); 224 } 225 226 /** 227 * @ticket 21212 228 */ 229 function test_strip_invalid_text_for_column() { 230 global $wpdb; 231 232 $charset = $wpdb->get_col_charset( $wpdb->posts, 'post_content' ); 233 if ( 'utf8' !== $charset && 'utf8mb4' !== $charset ) { 234 $this->markTestSkipped( 'This test requires a utf8 character set' ); 235 } 236 237 // Invalid 3-byte and 4-byte sequences 238 $value = "H€llo\xe0\x80\x80World\xf0\xff\xff\xff¢"; 239 $expected = "H€lloWorld¢"; 240 $actual = $wpdb->strip_invalid_text_for_column( $wpdb->posts, 'post_content', $value ); 241 $this->assertEquals( $expected, $actual ); 242 } 243 244 /** 245 * Set of table definitions for testing wpdb::get_table_charset and wpdb::get_column_charset 246 * @var array 247 */ 248 protected $table_and_column_defs = array( 249 array( 250 'definition' => '( a INT, b FLOAT )', 251 'table_expected' => false, 252 'column_expected' => array( 'a' => false, 'b' => false ) 253 ), 254 array( 255 'definition' => '( a VARCHAR(50) CHARACTER SET big5, b TEXT CHARACTER SET big5 )', 256 'table_expected' => 'big5', 257 'column_expected' => array( 'a' => 'big5', 'b' => 'big5' ) 258 ), 259 array( 260 'definition' => '( a VARCHAR(50) CHARACTER SET big5, b BINARY )', 261 'table_expected' => 'binary', 262 'column_expected' => array( 'a' => 'big5', 'b' => false ) 263 ), 264 array( 265 'definition' => '( a VARCHAR(50) CHARACTER SET latin1, b BLOB )', 266 'table_expected' => 'binary', 267 'column_expected' => array( 'a' => 'latin1', 'b' => false ) 268 ), 269 array( 270 'definition' => '( a VARCHAR(50) CHARACTER SET latin1, b TEXT CHARACTER SET koi8r )', 271 'table_expected' => 'koi8r', 272 'column_expected' => array( 'a' => 'latin1', 'b' => 'koi8r' ) 273 ), 274 array( 275 'definition' => '( a VARCHAR(50) CHARACTER SET utf8mb3, b TEXT CHARACTER SET utf8mb3 )', 276 'table_expected' => 'utf8', 277 'column_expected' => array( 'a' => 'utf8', 'b' => 'utf8' ) 278 ), 279 array( 280 'definition' => '( a VARCHAR(50) CHARACTER SET utf8, b TEXT CHARACTER SET utf8mb4 )', 281 'table_expected' => 'utf8', 282 'column_expected' => array( 'a' => 'utf8', 'b' => 'utf8mb4' ) 283 ), 284 array( 285 'definition' => '( a VARCHAR(50) CHARACTER SET big5, b TEXT CHARACTER SET koi8r )', 286 'table_expected' => 'ascii', 287 'column_expected' => array( 'a' => 'big5', 'b' => 'koi8r' ) 288 ), 289 ); 290 291 /** 292 * @ticket 21212 293 */ 294 function data_test_get_table_charset() { 295 $table_name = 'test_get_table_charset'; 296 297 $vars = array(); 298 foreach( $this->table_and_column_defs as $value ) { 299 $this_table_name = $table_name . '_' . rand_str( 5 ); 300 $drop = "DROP TABLE IF EXISTS $this_table_name"; 301 $create = "CREATE TABLE $this_table_name {$value['definition']}"; 302 $vars[] = array( $drop, $create, $this_table_name, $value['table_expected'] ); 303 } 304 305 return $vars; 306 } 307 308 /** 309 * @dataProvider data_test_get_table_charset 310 * @ticket 21212 311 */ 312 function test_get_table_charset( $drop, $create, $table, $expected_charset ) { 313 self::$_wpdb->query( $drop ); 314 315 if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) { 316 $this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." ); 317 return; 318 } 319 320 self::$_wpdb->query( $create ); 321 322 $charset = self::$_wpdb->get_table_charset( $table ); 323 $this->assertEquals( $charset, $expected_charset ); 324 325 $charset = self::$_wpdb->get_table_charset( strtoupper( $table ) ); 326 $this->assertEquals( $charset, $expected_charset ); 327 328 self::$_wpdb->query( $drop ); 329 } 330 331 /** 332 * @ticket 21212 333 */ 334 function data_test_get_column_charset() { 335 $table_name = 'test_get_column_charset'; 336 337 $vars = array(); 338 foreach( $this->table_and_column_defs as $value ) { 339 $this_table_name = $table_name . '_' . rand_str( 5 ); 340 $drop = "DROP TABLE IF EXISTS $this_table_name"; 341 $create = "CREATE TABLE $this_table_name {$value['definition']}"; 342 $vars[] = array( $drop, $create, $this_table_name, $value['column_expected'] ); 343 } 344 345 return $vars; 346 } 347 348 /** 349 * @dataProvider data_test_get_column_charset 350 * @ticket 21212 351 */ 352 function test_get_column_charset( $drop, $create, $table, $expected_charset ) { 353 self::$_wpdb->query( $drop ); 354 355 if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) { 356 $this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." ); 357 return; 358 } 359 360 self::$_wpdb->query( $create ); 361 362 foreach ( $expected_charset as $column => $charset ) { 363 $this->assertEquals( $charset, self::$_wpdb->get_col_charset( $table, $column ) ); 364 $this->assertEquals( $charset, self::$_wpdb->get_col_charset( strtoupper( $table ), strtoupper( $column ) ) ); 365 } 366 367 self::$_wpdb->query( $drop ); 368 } 369 370 /** 371 * @dataProvider data_test_get_column_charset 372 * @ticket 21212 373 */ 374 function test_get_column_charset_non_mysql( $drop, $create, $table, $columns ) { 375 self::$_wpdb->query( $drop ); 376 377 if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) { 378 $this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." ); 379 return; 380 } 381 382 self::$_wpdb->is_mysql = false; 383 384 self::$_wpdb->query( $create ); 385 386 $columns = array_keys( $columns ); 387 foreach ( $columns as $column => $charset ) { 388 $this->assertEquals( false, self::$_wpdb->get_col_charset( $table, $column ) ); 389 } 390 391 self::$_wpdb->query( $drop ); 392 393 self::$_wpdb->is_mysql = true; 394 } 395 396 /** 397 * @ticket 21212 398 */ 399 function data_strip_invalid_text_from_query() { 400 $table_name = 'strip_invalid_text_from_query_table'; 401 $data = array( 402 array( 403 // binary tables don't get stripped 404 "( a VARCHAR(50) CHARACTER SET utf8, b BINARY )", // create 405 "('foo\xf0\x9f\x98\x88bar', 'foo')", // query 406 "('foo\xf0\x9f\x98\x88bar', 'foo')" // expected result 407 ), 408 array( 409 // utf8/utf8mb4 tables default to utf8 410 "( a VARCHAR(50) CHARACTER SET utf8, b VARCHAR(50) CHARACTER SET utf8mb4 )", 411 "('foo\xf0\x9f\x98\x88bar', 'foo')", 412 "('foobar', 'foo')" 413 ), 414 ); 415 416 foreach( $data as &$value ) { 417 $this_table_name = $table_name . '_' . rand_str( 5 ); 418 419 $value[0] = "CREATE TABLE $this_table_name {$value[0]}"; 420 $value[1] = "INSERT INTO $this_table_name VALUES {$value[1]}"; 421 $value[2] = "INSERT INTO $this_table_name VALUES {$value[2]}"; 422 $value[3] = "DROP TABLE IF EXISTS $this_table_name"; 423 } 424 unset( $value ); 425 426 return $data; 427 } 428 429 /** 430 * @dataProvider data_strip_invalid_text_from_query 431 * @ticket 21212 432 */ 433 function test_strip_invalid_text_from_query( $create, $query, $expected, $drop ) { 434 self::$_wpdb->query( $drop ); 435 436 if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) { 437 $this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." ); 438 return; 439 } 440 441 self::$_wpdb->query( $create ); 442 443 $return = self::$_wpdb->strip_invalid_text_from_query( $query ); 444 $this->assertEquals( $expected, $return ); 445 446 self::$_wpdb->query( $drop ); 447 } 448 449 /** 450 * @ticket 21212 451 */ 452 function test_invalid_characters_in_query() { 453 global $wpdb; 454 455 $charset = $wpdb->get_col_charset( $wpdb->posts, 'post_content' ); 456 if ( 'utf8' !== $charset && 'utf8mb4' !== $charset ) { 457 $this->markTestSkipped( 'This test requires a utf8 character set' ); 458 } 459 460 $this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\xff\xff\xffbar')" ) ); 461 } 462 463 /** 464 * @ticket 21212 465 */ 466 function data_table_collation_check() { 467 $table_name = 'table_collation_check'; 468 $data = array( 469 array( 470 // utf8_bin tables don't need extra sanity checking. 471 "( a VARCHAR(50) COLLATE utf8_bin )", // create 472 true // expected result 473 ), 474 array( 475 // Neither do utf8_general_ci tables. 476 "( a VARCHAR(50) COLLATE utf8_general_ci )", 477 true 478 ), 479 array( 480 // utf8_unicode_ci tables do. 481 "( a VARCHAR(50) COLLATE utf8_unicode_ci )", 482 false 483 ), 484 array( 485 // utf8_bin tables don't need extra sanity checking, 486 // except for when they're not just utf8_bin. 487 "( a VARCHAR(50) COLLATE utf8_bin, b VARCHAR(50) COLLATE big5_chinese_ci )", 488 false 489 ), 490 array( 491 // utf8_bin tables don't need extra sanity checking 492 // when the other columns aren't strings. 493 "( a VARCHAR(50) COLLATE utf8_bin, b INT )", 494 true 495 ), 496 ); 497 498 foreach( $data as &$value ) { 499 $this_table_name = $table_name . '_' . rand_str( 5 ); 500 501 $value[0] = "CREATE TABLE $this_table_name {$value[0]}"; 502 $value[2] = "SELECT * FROM $this_table_name"; 503 $value[3] = "DROP TABLE IF EXISTS $this_table_name"; 504 } 505 unset( $value ); 506 507 return $data; 508 } 509 510 511 /** 512 * @dataProvider data_table_collation_check 513 * @ticket 21212 514 */ 515 function test_table_collation_check( $create, $expected, $query, $drop ) { 516 self::$_wpdb->query( $drop ); 517 518 self::$_wpdb->query( $create ); 519 520 $return = self::$_wpdb->check_collation( $query ); 521 $this->assertEquals( $expected, $return ); 522 523 self::$_wpdb->query( $drop ); 524 } 525 }
Note: See TracChangeset
for help on using the changeset viewer.