Changeset 32388
- Timestamp:
- 05/06/2015 07:08:42 PM (10 years ago)
- Location:
- branches/4.0
- Files:
-
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/4.0/src/wp-admin/includes/upgrade.php
r32313 r32388 441 441 upgrade_400(); 442 442 443 if ( $wp_current_db_version < 2963 1)444 upgrade_40 4();443 if ( $wp_current_db_version < 29632 ) 444 upgrade_405(); 445 445 446 446 maybe_disable_link_manager(); … … 1336 1336 */ 1337 1337 function upgrade_404() { 1338 } 1339 1340 /** 1341 * Execute changes made in WordPress 4.0.5. 1342 * 1343 * @since 4.0.5 1344 */ 1345 function upgrade_405() { 1338 1346 global $wp_current_db_version, $wpdb; 1339 1347 1340 if ( $wp_current_db_version < 2963 1) {1348 if ( $wp_current_db_version < 29632 ) { 1341 1349 $content_length = $wpdb->get_col_length( $wpdb->comments, 'comment_content' ); 1342 if ( ! $content_length ) { 1343 $content_length = 65535; 1344 } 1350 if ( false === $content_length ) { 1351 $content_length = array( 1352 'type' => 'byte', 1353 'length' => 65535, 1354 ); 1355 } elseif ( ! is_array( $content_length ) ) { 1356 $length = (int) $content_length > 0 ? (int) $content_length : 65535; 1357 $content_length = array( 1358 'type' => 'byte', 1359 'length' => $length 1360 ); 1361 } 1362 1363 if ( 'byte' !== $content_length['type'] ) { 1364 // Sites with malformed DB schemas are on their own. 1365 return; 1366 } 1367 1368 $allowed_length = intval( $content_length['length'] ) - 10; 1345 1369 1346 1370 $comments = $wpdb->get_results( 1347 "SELECT comment_ID FROM $wpdb->comments1348 WHERE comment_date_gmt> '2015-04-26'1349 AND CHAR_LENGTH( comment_content ) >= $content_length1350 AND ( comment_content LIKE '%<%' OR comment_contentLIKE '%>%' )"1371 "SELECT `comment_ID` FROM `{$wpdb->comments}` 1372 WHERE `comment_date_gmt` > '2015-04-26' 1373 AND LENGTH( `comment_content` ) >= {$allowed_length} 1374 AND ( `comment_content` LIKE '%<%' OR `comment_content` LIKE '%>%' )" 1351 1375 ); 1352 1376 -
branches/4.0/src/wp-includes/compat.php
r29382 r32388 14 14 } 15 15 16 if ( !function_exists('mb_substr') ): 17 function mb_substr( $str, $start, $length=null, $encoding=null ) { 18 return _mb_substr($str, $start, $length, $encoding); 19 } 20 endif; 21 22 function _mb_substr( $str, $start, $length=null, $encoding=null ) { 23 // the solution below, works only for utf-8, so in case of a different 24 // charset, just use built-in substr 25 $charset = get_option( 'blog_charset' ); 26 if ( !in_array( $charset, array('utf8', 'utf-8', 'UTF8', 'UTF-8') ) ) { 27 return is_null( $length )? substr( $str, $start ) : substr( $str, $start, $length); 28 } 29 // use the regex unicode support to separate the UTF-8 characters into an array 30 preg_match_all( '/./us', $str, $match ); 31 $chars = is_null( $length )? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length ); 32 return implode( '', $chars ); 16 /** 17 * Returns whether PCRE/u (PCRE_UTF8 modifier) is available for use. 18 * 19 * @ignore 20 * @since 4.2.2 21 * @access private 22 * 23 * @param bool $set - Used for testing only 24 * null : default - get PCRE/u capability 25 * false : Used for testing - return false for future calls to this function 26 * 'reset': Used for testing - restore default behavior of this function 27 */ 28 function _wp_can_use_pcre_u( $set = null ) { 29 static $utf8_pcre = 'reset'; 30 31 if ( null !== $set ) { 32 $utf8_pcre = $set; 33 } 34 35 if ( 'reset' === $utf8_pcre ) { 36 $utf8_pcre = @preg_match( '/^./u', 'a' ); 37 } 38 39 return $utf8_pcre; 40 } 41 42 if ( ! function_exists( 'mb_substr' ) ) : 43 function mb_substr( $str, $start, $length = null, $encoding = null ) { 44 return _mb_substr( $str, $start, $length, $encoding ); 45 } 46 endif; 47 48 /* 49 * Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit. 50 * For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence. 51 * The behavior of this function for invalid inputs is undefined. 52 */ 53 function _mb_substr( $str, $start, $length = null, $encoding = null ) { 54 if ( null === $encoding ) { 55 $encoding = get_option( 'blog_charset' ); 56 } 57 58 // The solution below works only for UTF-8, 59 // so in case of a different charset just use built-in substr() 60 if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) { 61 return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length ); 62 } 63 64 if ( _wp_can_use_pcre_u() ) { 65 // Use the regex unicode support to separate the UTF-8 characters into an array 66 preg_match_all( '/./us', $str, $match ); 67 $chars = is_null( $length ) ? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length ); 68 return implode( '', $chars ); 69 } 70 71 $regex = '/( 72 [\x00-\x7F] # single-byte sequences 0xxxxxxx 73 | [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx 74 | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 75 | [\xE1-\xEC][\x80-\xBF]{2} 76 | \xED[\x80-\x9F][\x80-\xBF] 77 | [\xEE-\xEF][\x80-\xBF]{2} 78 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 79 | [\xF1-\xF3][\x80-\xBF]{3} 80 | \xF4[\x80-\x8F][\x80-\xBF]{2} 81 )/x'; 82 83 $chars = array( '' ); // Start with 1 element instead of 0 since the first thing we do is pop 84 do { 85 // We had some string left over from the last round, but we counted it in that last round. 86 array_pop( $chars ); 87 88 // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string) 89 $pieces = preg_split( $regex, $str, 1000, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); 90 91 $chars = array_merge( $chars, $pieces ); 92 } while ( count( $pieces ) > 1 && $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop. 93 94 return join( '', array_slice( $chars, $start, $length ) ); 95 } 96 97 if ( ! function_exists( 'mb_strlen' ) ) : 98 function mb_strlen( $str, $encoding = null ) { 99 return _mb_strlen( $str, $encoding ); 100 } 101 endif; 102 103 /* 104 * Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit. 105 * For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence. 106 * The behavior of this function for invalid inputs is undefined. 107 */ 108 function _mb_strlen( $str, $encoding = null ) { 109 if ( null === $encoding ) { 110 $encoding = get_option( 'blog_charset' ); 111 } 112 113 // The solution below works only for UTF-8, 114 // so in case of a different charset just use built-in strlen() 115 if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) { 116 return strlen( $str ); 117 } 118 119 if ( _wp_can_use_pcre_u() ) { 120 // Use the regex unicode support to separate the UTF-8 characters into an array 121 preg_match_all( '/./us', $str, $match ); 122 return count( $match[0] ); 123 } 124 125 $regex = '/(?: 126 [\x00-\x7F] # single-byte sequences 0xxxxxxx 127 | [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx 128 | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 129 | [\xE1-\xEC][\x80-\xBF]{2} 130 | \xED[\x80-\x9F][\x80-\xBF] 131 | [\xEE-\xEF][\x80-\xBF]{2} 132 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 133 | [\xF1-\xF3][\x80-\xBF]{3} 134 | \xF4[\x80-\x8F][\x80-\xBF]{2} 135 )/x'; 136 137 $count = 1; // Start at 1 instead of 0 since the first thing we do is decrement 138 do { 139 // We had some string left over from the last round, but we counted it in that last round. 140 $count--; 141 142 // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string) 143 $pieces = preg_split( $regex, $str, 1000 ); 144 145 // Increment 146 $count += count( $pieces ); 147 } while ( $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop. 148 149 // Fencepost: preg_split() always returns one extra item in the array 150 return --$count; 33 151 } 34 152 -
branches/4.0/src/wp-includes/version.php
r32372 r32388 12 12 * @global int $wp_db_version 13 13 */ 14 $wp_db_version = 2963 1;14 $wp_db_version = 29632; 15 15 16 16 /** -
branches/4.0/src/wp-includes/wp-db.php
r32313 r32388 1788 1788 */ 1789 1789 function _insert_replace_helper( $table, $data, $format = null, $type = 'INSERT' ) { 1790 $this->insert_id = 0; 1791 1790 1792 if ( ! in_array( strtoupper( $type ), array( 'REPLACE', 'INSERT' ) ) ) { 1791 1793 return false; … … 1808 1810 $sql = "$type INTO `$table` ($fields) VALUES ($formats)"; 1809 1811 1810 $this->insert_id = 0;1811 1812 $this->check_current_query = false; 1812 1813 return $this->query( $this->prepare( $sql, $values ) ); … … 2004 2005 // This checks %d/%f versus ! %s because it's sprintf() could take more. 2005 2006 $value['charset'] = false; 2006 } elseif ( $this->check_ascii( $value['value'] ) ) {2007 // If it's ASCII, then we don't need the charset. We can skip this field.2008 $value['charset'] = false;2009 2007 } else { 2010 2008 $value['charset'] = $this->get_col_charset( $table, $field ); … … 2012 2010 return false; 2013 2011 } 2014 2015 // This isn't ASCII. Don't have strip_invalid_text() re-check.2016 $value['ascii'] = false;2017 2012 } 2018 2013 … … 2045 2040 return false; 2046 2041 } 2047 }2048 2049 if ( false !== $value['length'] && strlen( $value['value'] ) > $value['length'] ) {2050 return false;2051 2042 } 2052 2043 … … 2380 2371 /** 2381 2372 * Retrieve the maximum string length allowed in a given column. 2373 * The length may either be specified as a byte length or a character length. 2382 2374 * 2383 2375 * @since 4.2.1 … … 2386 2378 * @param string $table Table name. 2387 2379 * @param string $column Column name. 2388 * @return mixed Max column length as an int. False if the column has no 2389 * length. WP_Error object if there was an error. 2380 * @return mixed array( 'length' => (int), 'type' => 'byte' | 'char' ) 2381 * false if the column has no length (for example, numeric column) 2382 * WP_Error object if there was an error. 2390 2383 */ 2391 2384 public function get_col_length( $table, $column ) { … … 2420 2413 2421 2414 switch( $type ) { 2415 case 'char': 2416 case 'varchar': 2417 return array( 2418 'type' => 'char', 2419 'length' => (int) $length, 2420 ); 2421 break; 2422 2422 case 'binary': 2423 case 'char':2424 2423 case 'varbinary': 2425 case 'varchar': 2426 return $length; 2424 return array( 2425 'type' => 'byte', 2426 'length' => (int) $length, 2427 ); 2427 2428 break; 2428 2429 case 'tinyblob': 2429 2430 case 'tinytext': 2430 return 255; // 2^8 - 1 2431 return array( 2432 'type' => 'byte', 2433 'length' => 255, // 2^8 - 1 2434 ); 2431 2435 break; 2432 2436 case 'blob': 2433 2437 case 'text': 2434 return 65535; // 2^16 - 1 2438 return array( 2439 'type' => 'byte', 2440 'length' => 65535, // 2^16 - 1 2441 ); 2435 2442 break; 2436 2443 case 'mediumblob': 2437 2444 case 'mediumtext': 2438 return 16777215; // 2^24 - 1 2445 return array( 2446 'type' => 'byte', 2447 'length' => 16777215, // 2^24 - 1 2448 ); 2439 2449 break; 2440 2450 case 'longblob': 2441 2451 case 'longtext': 2442 return 4294967295; // 2^32 - 1 2452 return array( 2453 'type' => 'byte', 2454 'length' => 4294967295, // 2^32 - 1 2455 ); 2443 2456 break; 2444 2457 default: … … 2547 2560 // If any of the columns don't have one of these collations, it needs more sanity checking. 2548 2561 protected function strip_invalid_text( $data ) { 2549 // Some multibyte character sets that we can check in PHP.2550 $mb_charsets = array(2551 'ascii' => 'ASCII',2552 'big5' => 'BIG-5',2553 'eucjpms' => 'eucJP-win',2554 'gb2312' => 'EUC-CN',2555 'ujis' => 'EUC-JP',2556 'utf32' => 'UTF-32',2557 );2558 2559 $supported_charsets = array();2560 if ( function_exists( 'mb_list_encodings' ) ) {2561 $supported_charsets = mb_list_encodings();2562 }2563 2564 2562 $db_check_string = false; 2565 2563 … … 2567 2565 $charset = $value['charset']; 2568 2566 2569 // Column isn't a string, or is latin1, which will will happily store anything. 2570 if ( false === $charset || 'latin1' === $charset ) { 2567 if ( is_array( $value['length'] ) ) { 2568 $length = $value['length']['length']; 2569 } else { 2570 $length = false; 2571 } 2572 2573 // There's no charset to work with. 2574 if ( false === $charset ) { 2571 2575 continue; 2572 2576 } 2573 2577 2578 // Column isn't a string. 2574 2579 if ( ! is_string( $value['value'] ) ) { 2575 2580 continue; 2576 2581 } 2577 2582 2578 // ASCII is always OK. 2579 if ( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) ) { 2580 continue; 2581 } 2582 2583 // Convert the text locally. 2584 if ( $supported_charsets ) { 2585 if ( isset( $mb_charsets[ $charset ] ) && in_array( $mb_charsets[ $charset ], $supported_charsets ) ) { 2586 $value['value'] = mb_convert_encoding( $value['value'], $mb_charsets[ $charset ], $mb_charsets[ $charset ] ); 2583 $truncate_by_byte_length = 'byte' === $value['length']['type']; 2584 2585 $needs_validation = true; 2586 if ( 2587 // latin1 can store any byte sequence 2588 'latin1' === $charset 2589 || 2590 // ASCII is always OK. 2591 ( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) ) 2592 ) { 2593 $truncate_by_byte_length = true; 2594 $needs_validation = false; 2595 } 2596 2597 if ( $truncate_by_byte_length ) { 2598 mbstring_binary_safe_encoding(); 2599 if ( false !== $length && strlen( $value['value'] ) > $length ) { 2600 $value['value'] = substr( $value['value'], 0, $length ); 2601 } 2602 reset_mbstring_encoding(); 2603 2604 if ( ! $needs_validation ) { 2587 2605 continue; 2588 2606 } … … 2590 2608 2591 2609 // utf8 can be handled by regex, which is a bunch faster than a DB lookup. 2592 if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset) {2610 if ( ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) && function_exists( 'mb_strlen' ) ) { 2593 2611 $regex = '/ 2594 2612 ( … … 2600 2618 | [\xEE-\xEF][\x80-\xBF]{2}'; 2601 2619 2602 if ( 'utf8mb4' === $charset ) {2620 if ( 'utf8mb4' === $charset ) { 2603 2621 $regex .= ' 2604 2622 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 … … 2613 2631 /x'; 2614 2632 $value['value'] = preg_replace( $regex, '$1', $value['value'] ); 2633 2634 2635 if ( false !== $length && mb_strlen( $value['value'], 'UTF-8' ) > $length ) { 2636 $value['value'] = mb_substr( $value['value'], 0, $length, 'UTF-8' ); 2637 } 2615 2638 continue; 2616 2639 } … … 2629 2652 } 2630 2653 2631 // Split the CONVERT() calls by charset, so we can make sure the connection is right 2632 $queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( %s USING {$value['charset']} )", $value['value'] ); 2654 // We're going to need to truncate by characters or bytes, depending on the length value we have. 2655 if ( 'byte' === $value['length']['type'] ) { 2656 // Split the CONVERT() calls by charset, so we can make sure the connection is right 2657 $queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( LEFT( CONVERT( %s USING binary ), %d ) USING {$value['charset']} )", $value['value'], $value['length']['length'] ); 2658 } else { 2659 $queries[ $value['charset'] ][ $col ] = $this->prepare( "LEFT( CONVERT( %s USING {$value['charset']} ), %d )", $value['value'], $value['length']['length'] ); 2660 } 2661 2633 2662 unset( $data[ $col ]['db'] ); 2634 2663 } … … 2649 2678 $this->check_current_query = false; 2650 2679 2651 $row = $this->get_row( "SELECT " . implode( ', ', $query ), ARRAY_N ); 2680 $sql = array(); 2681 foreach ( $query as $column => $column_query ) { 2682 $sql[] = $column_query . " AS x_$column"; 2683 } 2684 2685 $row = $this->get_row( "SELECT " . implode( ', ', $sql ), ARRAY_A ); 2652 2686 if ( ! $row ) { 2653 2687 $this->set_charset( $this->dbh, $connection_charset ); … … 2655 2689 } 2656 2690 2657 $cols = array_keys( $query ); 2658 $col_count = count( $cols ); 2659 for ( $ii = 0; $ii < $col_count; $ii++ ) { 2660 $data[ $cols[ $ii ] ]['value'] = $row[ $ii ]; 2691 foreach ( array_keys( $query ) as $column ) { 2692 $data[ $column ]['value'] = $row["x_$column"]; 2661 2693 } 2662 2694 } … … 2700 2732 'charset' => $charset, 2701 2733 'ascii' => false, 2734 'length' => false, 2702 2735 ); 2703 2736 … … 2722 2755 */ 2723 2756 public function strip_invalid_text_for_column( $table, $column, $value ) { 2724 if ( ! is_string( $value ) || $this->check_ascii( $value )) {2757 if ( ! is_string( $value ) ) { 2725 2758 return $value; 2726 2759 } … … 2739 2772 'value' => $value, 2740 2773 'charset' => $charset, 2741 ' ascii' => false,2774 'length' => $this->get_col_length( $table, $column ), 2742 2775 ) 2743 2776 ); -
branches/4.0/tests/phpunit/tests/comment.php
r32313 r32388 24 24 } 25 25 26 $post_id = $this->factory->post->create(); 26 $u = $this->factory->user->create(); 27 $post_id = $this->factory->post->create( array( 'post_author' => $u ) ); 27 28 28 29 $data = array( -
branches/4.0/tests/phpunit/tests/compat.php
r25002 r32388 3 3 /** 4 4 * @group compat 5 * @group security-153 5 6 */ 6 7 class Tests_Compat extends WP_UnitTestCase { 7 function test_mb_substr() { 8 $this->assertEquals('баб', _mb_substr('баба', 0, 3)); 9 $this->assertEquals('баб', _mb_substr('баба', 0, -1)); 10 $this->assertEquals('баб', _mb_substr('баба', 0, -1)); 11 $this->assertEquals('I am your б', _mb_substr('I am your баба', 0, 11)); 8 function utf8_string_lengths() { 9 return array( 10 // string, character_length, byte_length 11 array( 'баба', 4, 8 ), 12 array( 'баб', 3, 6 ), 13 array( 'I am your б', 11, 12 ), 14 array( '1111111111', 10, 10 ), 15 array( '²²²²²²²²²²', 10, 20 ), 16 array( '3333333333', 10, 30 ), 17 array( '𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜', 10, 40 ), 18 array( '1²3𝟜1²3𝟜1²3𝟜', 12, 30 ), 19 ); 20 } 21 22 function utf8_substrings() { 23 return array( 24 // string, start, length, character_substring, byte_substring 25 array( 'баба', 0, 3, 'баб', "б\xD0" ), 26 array( 'баба', 0, -1, 'баб', "баб\xD0" ), 27 array( 'баба', 1, null, 'аба', "\xB1аба" ), 28 array( 'баба', -3, null, 'аба', "\xB1а" ), 29 array( 'баба', -3, 2, 'аб', "\xB1\xD0" ), 30 array( 'баба', -1, 2, 'а', "\xB0" ), 31 array( 'I am your баба', 0, 11, 'I am your б', "I am your \xD0" ), 32 ); 33 } 34 35 /** 36 * @dataProvider utf8_string_lengths 37 */ 38 function test_mb_strlen( $string, $expected_character_length ) { 39 $this->assertEquals( $expected_character_length, _mb_strlen( $string, 'UTF-8' ) ); 40 } 41 42 /** 43 * @dataProvider utf8_string_lengths 44 */ 45 function test_mb_strlen_via_regex( $string, $expected_character_length ) { 46 _wp_can_use_pcre_u( false ); 47 $this->assertEquals( $expected_character_length, _mb_strlen( $string, 'UTF-8' ) ); 48 _wp_can_use_pcre_u( 'reset' ); 49 } 50 51 /** 52 * @dataProvider utf8_string_lengths 53 */ 54 function test_8bit_mb_strlen( $string, $expected_character_length, $expected_byte_length ) { 55 $this->assertEquals( $expected_byte_length, _mb_strlen( $string, '8bit' ) ); 56 } 57 58 /** 59 * @dataProvider utf8_substrings 60 */ 61 function test_mb_substr( $string, $start, $length, $expected_character_substring ) { 62 $this->assertEquals( $expected_character_substring, _mb_substr( $string, $start, $length, 'UTF-8' ) ); 63 } 64 65 /** 66 * @dataProvider utf8_substrings 67 */ 68 function test_mb_substr_via_regex( $string, $start, $length, $expected_character_substring ) { 69 _wp_can_use_pcre_u( false ); 70 $this->assertEquals( $expected_character_substring, _mb_substr( $string, $start, $length, 'UTF-8' ) ); 71 _wp_can_use_pcre_u( 'reset' ); 72 } 73 74 /** 75 * @dataProvider utf8_substrings 76 */ 77 function test_8bit_mb_substr( $string, $start, $length, $expected_character_substring, $expected_byte_substring ) { 78 $this->assertEquals( $expected_byte_substring, _mb_substr( $string, $start, $length, '8bit' ) ); 79 } 80 81 function test_mb_substr_phpcore(){ 82 /* https://github.com/php/php-src/blob/php-5.6.8/ext/mbstring/tests/mb_substr_basic.phpt */ 83 $string_ascii = 'ABCDEF'; 84 $string_mb = base64_decode('5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII='); 85 86 $this->assertEquals( 'DEF', _mb_substr($string_ascii, 3) ); 87 $this->assertEquals( 'DEF', _mb_substr($string_ascii, 3, 5, 'ISO-8859-1') ); 88 89 // specific latin-1 as that is the default the core php test opporates under 90 $this->assertEquals( 'peacrOiqng==' , base64_encode( _mb_substr($string_mb, 2, 7, 'latin-1' ) ) ); 91 $this->assertEquals( '6Kqe44OG44Kt44K544OI44Gn44GZ', base64_encode( _mb_substr($string_mb, 2, 7, 'utf-8') ) ); 92 93 /* https://github.com/php/php-src/blob/php-5.6.8/ext/mbstring/tests/mb_substr_variation1.phpt */ 94 $start = 0; 95 $length = 5; 96 $unset_var = 10; 97 unset ($unset_var); 98 $heredoc = <<<EOT 99 hello world 100 EOT; 101 $inputs = array( 102 /*1*/ 0, 103 1, 104 12345, 105 -2345, 106 // float data 107 /*5*/ 10.5, 108 -10.5, 109 12.3456789000e10, 110 12.3456789000E-10, 111 .5, 112 // null data 113 /*10*/ NULL, 114 null, 115 // boolean data 116 /*12*/ true, 117 false, 118 TRUE, 119 FALSE, 120 // empty data 121 /*16*/ "", 122 '', 123 // string data 124 /*18*/ "string", 125 'string', 126 $heredoc, 127 // object data 128 /*21*/ new classA(), 129 // undefined data 130 /*22*/ @$undefined_var, 131 // unset data 132 /*23*/ @$unset_var, 133 ); 134 $outputs = array( 135 "0", 136 "1", 137 "12345", 138 "-2345", 139 "10.5", 140 "-10.5", 141 "12345", 142 "1.234", 143 "0.5", 144 "", 145 "", 146 "1", 147 "", 148 "1", 149 "", 150 "", 151 "", 152 "strin", 153 "strin", 154 "hello", 155 "Class", 156 "", 157 "", 158 ); 159 $iterator = 0; 160 foreach($inputs as $input) { 161 $this->assertEquals( $outputs[$iterator] , _mb_substr($input, $start, $length) ); 162 $iterator++; 163 } 164 12 165 } 13 166 … … 35 188 } 36 189 } 190 191 /* used in test_mb_substr_phpcore */ 192 class classA { 193 public function __toString() { 194 return "Class A object"; 195 } 196 } -
branches/4.0/tests/phpunit/tests/db.php
r32313 r32388 712 712 'format' => '%s', 713 713 'charset' => $expected_charset, 714 'ascii' => false,715 714 'length' => $wpdb->get_col_length( $wpdb->posts, 'post_content' ), 716 715 ) -
branches/4.0/tests/phpunit/tests/db/charset.php
r32272 r32388 7 7 * 8 8 * @group wpdb 9 * @group security-153 9 10 */ 10 11 class Tests_DB_Charset extends WP_UnitTestCase { … … 29 30 'charset' => 'latin1', 30 31 'value' => "\xf0\x9f\x8e\xb7", 31 'expected' => "\xf0\x9f\x8e\xb7" 32 'expected' => "\xf0\x9f\x8e\xb7", 33 'length' => array( 'type' => 'char', 'length' => 100 ), 34 ), 35 'latin1_char_length' => array( 36 // latin1. latin1 never changes. 37 'charset' => 'latin1', 38 'value' => str_repeat( 'A', 11 ), 39 'expected' => str_repeat( 'A', 10 ), 40 'length' => array( 'type' => 'char', 'length' => 10 ), 41 ), 42 'latin1_byte_length' => array( 43 // latin1. latin1 never changes. 44 'charset' => 'latin1', 45 'value' => str_repeat( 'A', 11 ), 46 'expected' => str_repeat( 'A', 10 ), 47 'length' => array( 'type' => 'byte', 'length' => 10 ), 32 48 ), 33 49 'ascii' => array( … … 35 51 'charset' => 'ascii', 36 52 'value' => 'Hello World', 37 'expected' => 'Hello World' 53 'expected' => 'Hello World', 54 'length' => array( 'type' => 'char', 'length' => 100 ), 55 ), 56 'ascii_char_length' => array( 57 // ascii gets special treatment, make sure it's covered 58 'charset' => 'ascii', 59 'value' => str_repeat( 'A', 11 ), 60 'expected' => str_repeat( 'A', 10 ), 61 'length' => array( 'type' => 'char', 'length' => 10 ), 62 ), 63 'ascii_byte_length' => array( 64 // ascii gets special treatment, make sure it's covered 65 'charset' => 'ascii', 66 'value' => str_repeat( 'A', 11 ), 67 'expected' => str_repeat( 'A', 10 ), 68 'length' => array( 'type' => 'byte', 'length' => 10 ), 38 69 ), 39 70 'utf8' => array( … … 41 72 'charset' => 'utf8', 42 73 'value' => "H€llo\xf0\x9f\x98\x88World¢", 43 'expected' => 'H€lloWorld¢' 74 'expected' => 'H€lloWorld¢', 75 'length' => array( 'type' => 'char', 'length' => 100 ), 76 ), 77 'utf8_23char_length' => array( 78 // utf8 only allows <= 3-byte chars 79 'charset' => 'utf8', 80 'value' => str_repeat( "²3", 10 ), 81 'expected' => str_repeat( "²3", 5 ), 82 'length' => array( 'type' => 'char', 'length' => 10 ), 83 ), 84 'utf8_23byte_length' => array( 85 // utf8 only allows <= 3-byte chars 86 'charset' => 'utf8', 87 'value' => str_repeat( "²3", 10 ), 88 'expected' => "²3²3", 89 'length' => array( 'type' => 'byte', 'length' => 10 ), 90 ), 91 'utf8_3char_length' => array( 92 // utf8 only allows <= 3-byte chars 93 'charset' => 'utf8', 94 'value' => str_repeat( "3", 11 ), 95 'expected' => str_repeat( "3", 10 ), 96 'length' => array( 'type' => 'char', 'length' => 10 ), 97 ), 98 'utf8_3byte_length' => array( 99 // utf8 only allows <= 3-byte chars 100 'charset' => 'utf8', 101 'value' => str_repeat( "3", 11 ), 102 'expected' => "333", 103 'length' => array( 'type' => 'byte', 'length' => 10 ), 44 104 ), 45 105 'utf8mb3' => array( … … 47 107 'charset' => 'utf8mb3', 48 108 'value' => "H€llo\xf0\x9f\x98\x88World¢", 49 'expected' => 'H€lloWorld¢' 109 'expected' => 'H€lloWorld¢', 110 'length' => array( 'type' => 'char', 'length' => 100 ), 111 ), 112 'utf8mb3_23char_length' => array( 113 // utf8mb3 should behave the same an utf8 114 'charset' => 'utf8mb3', 115 'value' => str_repeat( "²3", 10 ), 116 'expected' => str_repeat( "²3", 5 ), 117 'length' => array( 'type' => 'char', 'length' => 10 ), 118 ), 119 'utf8mb3_23byte_length' => array( 120 // utf8mb3 should behave the same an utf8 121 'charset' => 'utf8mb3', 122 'value' => str_repeat( "²3", 10 ), 123 'expected' => "²3²3", 124 'length' => array( 'type' => 'byte', 'length' => 10 ), 125 ), 126 'utf8mb3_3char_length' => array( 127 // utf8mb3 should behave the same an utf8 128 'charset' => 'utf8mb3', 129 'value' => str_repeat( "3", 11 ), 130 'expected' => str_repeat( "3", 10 ), 131 'length' => array( 'type' => 'char', 'length' => 10 ), 132 ), 133 'utf8mb3_3byte_length' => array( 134 // utf8mb3 should behave the same an utf8 135 'charset' => 'utf8mb3', 136 'value' => str_repeat( "3", 10 ), 137 'expected' => "333", 138 'length' => array( 'type' => 'byte', 'length' => 10 ), 50 139 ), 51 140 'utf8mb4' => array( … … 53 142 'charset' => 'utf8mb4', 54 143 'value' => "H€llo\xf0\x9f\x98\x88World¢", 55 'expected' => "H€llo\xf0\x9f\x98\x88World¢" 144 'expected' => "H€llo\xf0\x9f\x98\x88World¢", 145 'length' => array( 'type' => 'char', 'length' => 100 ), 146 ), 147 'utf8mb4_234char_length' => array( 148 // utf8mb4 allows 4-byte characters, too 149 'charset' => 'utf8mb4', 150 'value' => str_repeat( "²3𝟜", 10 ), 151 'expected' => "²3𝟜²3𝟜²3𝟜²", 152 'length' => array( 'type' => 'char', 'length' => 10 ), 153 ), 154 'utf8mb4_234byte_length' => array( 155 // utf8mb4 allows 4-byte characters, too 156 'charset' => 'utf8mb4', 157 'value' => str_repeat( "²3𝟜", 10 ), 158 'expected' => "²3𝟜", 159 'length' => array( 'type' => 'byte', 'length' => 10 ), 160 ), 161 'utf8mb4_4char_length' => array( 162 // utf8mb4 allows 4-byte characters, too 163 'charset' => 'utf8mb4', 164 'value' => str_repeat( "𝟜", 11 ), 165 'expected' => str_repeat( "𝟜", 10 ), 166 'length' => array( 'type' => 'char', 'length' => 10 ), 167 ), 168 'utf8mb4_4byte_length' => array( 169 // utf8mb4 allows 4-byte characters, too 170 'charset' => 'utf8mb4', 171 'value' => str_repeat( "𝟜", 10 ), 172 'expected' => "𝟜𝟜", 173 'length' => array( 'type' => 'byte', 'length' => 10 ), 56 174 ), 57 175 'koi8r' => array( … … 59 177 'value' => "\xfdord\xf2ress", 60 178 'expected' => "\xfdord\xf2ress", 179 'length' => array( 'type' => 'char', 'length' => 100 ), 180 ), 181 'koi8r_char_length' => array( 182 'charset' => 'koi8r', 183 'value' => str_repeat( "\xfd\xf2", 10 ), 184 'expected' => str_repeat( "\xfd\xf2", 5 ), 185 'length' => array( 'type' => 'char', 'length' => 10 ), 186 ), 187 'koi8r_byte_length' => array( 188 'charset' => 'koi8r', 189 'value' => str_repeat( "\xfd\xf2", 10 ), 190 'expected' => str_repeat( "\xfd\xf2", 5 ), 191 'length' => array( 'type' => 'byte', 'length' => 10 ), 61 192 ), 62 193 'hebrew' => array( … … 64 195 'value' => "\xf9ord\xf7ress", 65 196 'expected' => "\xf9ord\xf7ress", 197 'length' => array( 'type' => 'char', 'length' => 100 ), 198 ), 199 'hebrew_char_length' => array( 200 'charset' => 'hebrew', 201 'value' => str_repeat( "\xf9\xf7", 10 ), 202 'expected' => str_repeat( "\xf9\xf7", 5 ), 203 'length' => array( 'type' => 'char', 'length' => 10 ), 204 ), 205 'hebrew_byte_length' => array( 206 'charset' => 'hebrew', 207 'value' => str_repeat( "\xf9\xf7", 10 ), 208 'expected' => str_repeat( "\xf9\xf7", 5 ), 209 'length' => array( 'type' => 'byte', 'length' => 10 ), 66 210 ), 67 211 'cp1251' => array( … … 69 213 'value' => "\xd8ord\xd0ress", 70 214 'expected' => "\xd8ord\xd0ress", 215 'length' => array( 'type' => 'char', 'length' => 100 ), 216 ), 217 'cp1251_char_length' => array( 218 'charset' => 'cp1251', 219 'value' => str_repeat( "\xd8\xd0", 10 ), 220 'expected' => str_repeat( "\xd8\xd0", 5 ), 221 'length' => array( 'type' => 'char', 'length' => 10 ), 222 ), 223 'cp1251_byte_length' => array( 224 'charset' => 'cp1251', 225 'value' => str_repeat( "\xd8\xd0", 10 ), 226 'expected' => str_repeat( "\xd8\xd0", 5 ), 227 'length' => array( 'type' => 'byte', 'length' => 10 ), 71 228 ), 72 229 'tis620' => array( … … 74 231 'value' => "\xccord\xe3ress", 75 232 'expected' => "\xccord\xe3ress", 233 'length' => array( 'type' => 'char', 'length' => 100 ), 234 ), 235 'tis620_char_length' => array( 236 'charset' => 'tis620', 237 'value' => str_repeat( "\xcc\xe3", 10 ), 238 'expected' => str_repeat( "\xcc\xe3", 5 ), 239 'length' => array( 'type' => 'char', 'length' => 10 ), 240 ), 241 'tis620_byte_length' => array( 242 'charset' => 'tis620', 243 'value' => str_repeat( "\xcc\xe3", 10 ), 244 'expected' => str_repeat( "\xcc\xe3", 5 ), 245 'length' => array( 'type' => 'byte', 'length' => 10 ), 76 246 ), 77 247 'false' => array( … … 79 249 'charset' => false, 80 250 'value' => 100, 81 'expected' => 100 251 'expected' => 100, 252 'length' => false, 82 253 ), 83 254 ); … … 95 266 'charset' => 'big5', 96 267 'value' => $big5, 97 'expected' => $big5 268 'expected' => $big5, 269 'length' => array( 'type' => 'char', 'length' => 100 ), 270 ); 271 272 $fields['big5_char_length'] = array( 273 'charset' => 'big5', 274 'value' => str_repeat( $big5, 10 ), 275 'expected' => str_repeat( $big5, 3 ) . 'a', 276 'length' => array( 'type' => 'char', 'length' => 10 ), 277 ); 278 279 $fields['big5_byte_length'] = array( 280 'charset' => 'big5', 281 'value' => str_repeat( $big5, 10 ), 282 'expected' => str_repeat( $big5, 2 ) . 'a', 283 'length' => array( 'type' => 'byte', 'length' => 10 ), 98 284 ); 99 285 } … … 167 353 168 354 $all_ascii_fields = array( 169 'post_content' => array( 'value' => 'foo foo foo!', 'format' => '%s', 'charset' => false),170 'post_excerpt' => array( 'value' => 'bar bar bar!', 'format' => '%s', 'charset' => false),355 'post_content' => array( 'value' => 'foo foo foo!', 'format' => '%s', 'charset' => $charset ), 356 'post_excerpt' => array( 'value' => 'bar bar bar!', 'format' => '%s', 'charset' => $charset ), 171 357 ); 172 358 173 359 // This is the same data used in process_field_charsets_for_nonexistent_table() 174 360 $non_ascii_string_fields = array( 175 'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s', 'charset' => $charset , 'ascii' => false),176 'post_excerpt' => array( 'value' => '¡bar bar bar!', 'format' => '%s', 'charset' => $charset , 'ascii' => false),361 'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s', 'charset' => $charset ), 362 'post_excerpt' => array( 'value' => '¡bar bar bar!', 'format' => '%s', 'charset' => $charset ), 177 363 ); 178 364 … … 541 727 self::$_wpdb->query( $drop ); 542 728 } 729 730 function test_strip_invalid_test_for_column_bails_if_ascii_input_too_long() { 731 global $wpdb; 732 733 // TEXT column 734 $stripped = $wpdb->strip_invalid_text_for_column( $wpdb->comments, 'comment_content', str_repeat( 'A', 65536 ) ); 735 $this->assertEquals( 65535, strlen( $stripped ) ); 736 737 // VARCHAR column 738 $stripped = $wpdb->strip_invalid_text_for_column( $wpdb->comments, 'comment_agent', str_repeat( 'A', 256 ) ); 739 $this->assertEquals( 255, strlen( $stripped ) ); 740 } 543 741 }
Note: See TracChangeset
for help on using the changeset viewer.