Changeset 32387
- Timestamp:
- 05/06/2015 07:06:02 PM (10 years ago)
- Location:
- branches/4.1
- Files:
-
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/4.1/src/wp-admin/includes/upgrade.php
r32312 r32387 443 443 upgrade_400(); 444 444 445 if ( $wp_current_db_version < 3013 4)446 upgrade_41 4();445 if ( $wp_current_db_version < 30135 ) 446 upgrade_415(); 447 447 448 448 maybe_disable_link_manager(); … … 1335 1335 * Execute changes made in WordPress 4.1.4. 1336 1336 * 1337 * @since 4.1. 31337 * @since 4.1.4 1338 1338 */ 1339 1339 function upgrade_414() { 1340 } 1341 1342 /** 1343 * Execute changes made in WordPress 4.1.5. 1344 * 1345 * @since 4.1.5 1346 */ 1347 function upgrade_415() { 1340 1348 global $wp_current_db_version, $wpdb; 1341 1349 1342 if ( $wp_current_db_version < 3013 4) {1350 if ( $wp_current_db_version < 30135 ) { 1343 1351 $content_length = $wpdb->get_col_length( $wpdb->comments, 'comment_content' ); 1344 if ( ! $content_length ) { 1345 $content_length = 65535; 1346 } 1352 if ( false === $content_length ) { 1353 $content_length = array( 1354 'type' => 'byte', 1355 'length' => 65535, 1356 ); 1357 } elseif ( ! is_array( $content_length ) ) { 1358 $length = (int) $content_length > 0 ? (int) $content_length : 65535; 1359 $content_length = array( 1360 'type' => 'byte', 1361 'length' => $length 1362 ); 1363 } 1364 1365 if ( 'byte' !== $content_length['type'] ) { 1366 // Sites with malformed DB schemas are on their own. 1367 return; 1368 } 1369 1370 $allowed_length = intval( $content_length['length'] ) - 10; 1347 1371 1348 1372 $comments = $wpdb->get_results( 1349 "SELECT comment_ID FROM $wpdb->comments1350 WHERE comment_date_gmt> '2015-04-26'1351 AND CHAR_LENGTH( comment_content ) >= $content_length1352 AND ( comment_content LIKE '%<%' OR comment_contentLIKE '%>%' )"1373 "SELECT `comment_ID` FROM `{$wpdb->comments}` 1374 WHERE `comment_date_gmt` > '2015-04-26' 1375 AND LENGTH( `comment_content` ) >= {$allowed_length} 1376 AND ( `comment_content` LIKE '%<%' OR `comment_content` LIKE '%>%' )" 1353 1377 ); 1354 1378 -
branches/4.1/src/wp-includes/compat.php
r30075 r32387 14 14 } 15 15 16 if ( !function_exists('mb_substr') ): 17 function mb_substr( $str, $start, $length=null, $encoding=null ) { 18 return _mb_substr($str, $start, $length, $encoding); 19 } 20 endif; 21 22 function _mb_substr( $str, $start, $length=null, $encoding=null ) { 23 // the solution below, works only for utf-8, so in case of a different 24 // charset, just use built-in substr 25 $charset = get_option( 'blog_charset' ); 26 if ( !in_array( $charset, array('utf8', 'utf-8', 'UTF8', 'UTF-8') ) ) { 27 return is_null( $length )? substr( $str, $start ) : substr( $str, $start, $length); 28 } 29 // use the regex unicode support to separate the UTF-8 characters into an array 30 preg_match_all( '/./us', $str, $match ); 31 $chars = is_null( $length )? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length ); 32 return implode( '', $chars ); 16 /** 17 * Returns whether PCRE/u (PCRE_UTF8 modifier) is available for use. 18 * 19 * @ignore 20 * @since 4.2.2 21 * @access private 22 * 23 * @param bool $set - Used for testing only 24 * null : default - get PCRE/u capability 25 * false : Used for testing - return false for future calls to this function 26 * 'reset': Used for testing - restore default behavior of this function 27 */ 28 function _wp_can_use_pcre_u( $set = null ) { 29 static $utf8_pcre = 'reset'; 30 31 if ( null !== $set ) { 32 $utf8_pcre = $set; 33 } 34 35 if ( 'reset' === $utf8_pcre ) { 36 $utf8_pcre = @preg_match( '/^./u', 'a' ); 37 } 38 39 return $utf8_pcre; 40 } 41 42 if ( ! function_exists( 'mb_substr' ) ) : 43 function mb_substr( $str, $start, $length = null, $encoding = null ) { 44 return _mb_substr( $str, $start, $length, $encoding ); 45 } 46 endif; 47 48 /* 49 * Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit. 50 * For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence. 51 * The behavior of this function for invalid inputs is undefined. 52 */ 53 function _mb_substr( $str, $start, $length = null, $encoding = null ) { 54 if ( null === $encoding ) { 55 $encoding = get_option( 'blog_charset' ); 56 } 57 58 // The solution below works only for UTF-8, 59 // so in case of a different charset just use built-in substr() 60 if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) { 61 return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length ); 62 } 63 64 if ( _wp_can_use_pcre_u() ) { 65 // Use the regex unicode support to separate the UTF-8 characters into an array 66 preg_match_all( '/./us', $str, $match ); 67 $chars = is_null( $length ) ? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length ); 68 return implode( '', $chars ); 69 } 70 71 $regex = '/( 72 [\x00-\x7F] # single-byte sequences 0xxxxxxx 73 | [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx 74 | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 75 | [\xE1-\xEC][\x80-\xBF]{2} 76 | \xED[\x80-\x9F][\x80-\xBF] 77 | [\xEE-\xEF][\x80-\xBF]{2} 78 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 79 | [\xF1-\xF3][\x80-\xBF]{3} 80 | \xF4[\x80-\x8F][\x80-\xBF]{2} 81 )/x'; 82 83 $chars = array( '' ); // Start with 1 element instead of 0 since the first thing we do is pop 84 do { 85 // We had some string left over from the last round, but we counted it in that last round. 86 array_pop( $chars ); 87 88 // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string) 89 $pieces = preg_split( $regex, $str, 1000, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); 90 91 $chars = array_merge( $chars, $pieces ); 92 } while ( count( $pieces ) > 1 && $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop. 93 94 return join( '', array_slice( $chars, $start, $length ) ); 95 } 96 97 if ( ! function_exists( 'mb_strlen' ) ) : 98 function mb_strlen( $str, $encoding = null ) { 99 return _mb_strlen( $str, $encoding ); 100 } 101 endif; 102 103 /* 104 * Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit. 105 * For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence. 106 * The behavior of this function for invalid inputs is undefined. 107 */ 108 function _mb_strlen( $str, $encoding = null ) { 109 if ( null === $encoding ) { 110 $encoding = get_option( 'blog_charset' ); 111 } 112 113 // The solution below works only for UTF-8, 114 // so in case of a different charset just use built-in strlen() 115 if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) { 116 return strlen( $str ); 117 } 118 119 if ( _wp_can_use_pcre_u() ) { 120 // Use the regex unicode support to separate the UTF-8 characters into an array 121 preg_match_all( '/./us', $str, $match ); 122 return count( $match[0] ); 123 } 124 125 $regex = '/(?: 126 [\x00-\x7F] # single-byte sequences 0xxxxxxx 127 | [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx 128 | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 129 | [\xE1-\xEC][\x80-\xBF]{2} 130 | \xED[\x80-\x9F][\x80-\xBF] 131 | [\xEE-\xEF][\x80-\xBF]{2} 132 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 133 | [\xF1-\xF3][\x80-\xBF]{3} 134 | \xF4[\x80-\x8F][\x80-\xBF]{2} 135 )/x'; 136 137 $count = 1; // Start at 1 instead of 0 since the first thing we do is decrement 138 do { 139 // We had some string left over from the last round, but we counted it in that last round. 140 $count--; 141 142 // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string) 143 $pieces = preg_split( $regex, $str, 1000 ); 144 145 // Increment 146 $count += count( $pieces ); 147 } while ( $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop. 148 149 // Fencepost: preg_split() always returns one extra item in the array 150 return --$count; 33 151 } 34 152 -
branches/4.1/src/wp-includes/version.php
r32371 r32387 12 12 * @global int $wp_db_version 13 13 */ 14 $wp_db_version = 3013 4;14 $wp_db_version = 30135; 15 15 16 16 /** -
branches/4.1/src/wp-includes/wp-db.php
r32307 r32387 1790 1790 */ 1791 1791 function _insert_replace_helper( $table, $data, $format = null, $type = 'INSERT' ) { 1792 $this->insert_id = 0; 1793 1792 1794 if ( ! in_array( strtoupper( $type ), array( 'REPLACE', 'INSERT' ) ) ) { 1793 1795 return false; … … 1810 1812 $sql = "$type INTO `$table` ($fields) VALUES ($formats)"; 1811 1813 1812 $this->insert_id = 0;1813 1814 $this->check_current_query = false; 1814 1815 return $this->query( $this->prepare( $sql, $values ) ); … … 2002 2003 // This checks %d/%f versus ! %s because it's sprintf() could take more. 2003 2004 $value['charset'] = false; 2004 } elseif ( $this->check_ascii( $value['value'] ) ) {2005 // If it's ASCII, then we don't need the charset. We can skip this field.2006 $value['charset'] = false;2007 2005 } else { 2008 2006 $value['charset'] = $this->get_col_charset( $table, $field ); … … 2010 2008 return false; 2011 2009 } 2012 2013 // This isn't ASCII. Don't have strip_invalid_text() re-check.2014 $value['ascii'] = false;2015 2010 } 2016 2011 … … 2043 2038 return false; 2044 2039 } 2045 }2046 2047 if ( false !== $value['length'] && strlen( $value['value'] ) > $value['length'] ) {2048 return false;2049 2040 } 2050 2041 … … 2381 2372 /** 2382 2373 * Retrieve the maximum string length allowed in a given column. 2374 * The length may either be specified as a byte length or a character length. 2383 2375 * 2384 2376 * @since 4.2.1 … … 2387 2379 * @param string $table Table name. 2388 2380 * @param string $column Column name. 2389 * @return mixed Max column length as an int. False if the column has no 2390 * length. WP_Error object if there was an error. 2381 * @return mixed array( 'length' => (int), 'type' => 'byte' | 'char' ) 2382 * false if the column has no length (for example, numeric column) 2383 * WP_Error object if there was an error. 2391 2384 */ 2392 2385 public function get_col_length( $table, $column ) { … … 2421 2414 2422 2415 switch( $type ) { 2416 case 'char': 2417 case 'varchar': 2418 return array( 2419 'type' => 'char', 2420 'length' => (int) $length, 2421 ); 2422 break; 2423 2423 case 'binary': 2424 case 'char':2425 2424 case 'varbinary': 2426 case 'varchar': 2427 return $length; 2425 return array( 2426 'type' => 'byte', 2427 'length' => (int) $length, 2428 ); 2428 2429 break; 2429 2430 case 'tinyblob': 2430 2431 case 'tinytext': 2431 return 255; // 2^8 - 1 2432 return array( 2433 'type' => 'byte', 2434 'length' => 255, // 2^8 - 1 2435 ); 2432 2436 break; 2433 2437 case 'blob': 2434 2438 case 'text': 2435 return 65535; // 2^16 - 1 2439 return array( 2440 'type' => 'byte', 2441 'length' => 65535, // 2^16 - 1 2442 ); 2436 2443 break; 2437 2444 case 'mediumblob': 2438 2445 case 'mediumtext': 2439 return 16777215; // 2^24 - 1 2446 return array( 2447 'type' => 'byte', 2448 'length' => 16777215, // 2^24 - 1 2449 ); 2440 2450 break; 2441 2451 case 'longblob': 2442 2452 case 'longtext': 2443 return 4294967295; // 2^32 - 1 2453 return array( 2454 'type' => 'byte', 2455 'length' => 4294967295, // 2^32 - 1 2456 ); 2444 2457 break; 2445 2458 default: … … 2548 2561 // If any of the columns don't have one of these collations, it needs more sanity checking. 2549 2562 protected function strip_invalid_text( $data ) { 2550 // Some multibyte character sets that we can check in PHP.2551 $mb_charsets = array(2552 'ascii' => 'ASCII',2553 'big5' => 'BIG-5',2554 'eucjpms' => 'eucJP-win',2555 'gb2312' => 'EUC-CN',2556 'ujis' => 'EUC-JP',2557 'utf32' => 'UTF-32',2558 );2559 2560 $supported_charsets = array();2561 if ( function_exists( 'mb_list_encodings' ) ) {2562 $supported_charsets = mb_list_encodings();2563 }2564 2565 2563 $db_check_string = false; 2566 2564 … … 2568 2566 $charset = $value['charset']; 2569 2567 2570 // Column isn't a string, or is latin1, which will will happily store anything. 2571 if ( false === $charset || 'latin1' === $charset ) { 2568 if ( is_array( $value['length'] ) ) { 2569 $length = $value['length']['length']; 2570 } else { 2571 $length = false; 2572 } 2573 2574 // There's no charset to work with. 2575 if ( false === $charset ) { 2572 2576 continue; 2573 2577 } 2574 2578 2579 // Column isn't a string. 2575 2580 if ( ! is_string( $value['value'] ) ) { 2576 2581 continue; 2577 2582 } 2578 2583 2579 // ASCII is always OK. 2580 if ( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) ) { 2581 continue; 2582 } 2583 2584 // Convert the text locally. 2585 if ( $supported_charsets ) { 2586 if ( isset( $mb_charsets[ $charset ] ) && in_array( $mb_charsets[ $charset ], $supported_charsets ) ) { 2587 $value['value'] = mb_convert_encoding( $value['value'], $mb_charsets[ $charset ], $mb_charsets[ $charset ] ); 2584 $truncate_by_byte_length = 'byte' === $value['length']['type']; 2585 2586 $needs_validation = true; 2587 if ( 2588 // latin1 can store any byte sequence 2589 'latin1' === $charset 2590 || 2591 // ASCII is always OK. 2592 ( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) ) 2593 ) { 2594 $truncate_by_byte_length = true; 2595 $needs_validation = false; 2596 } 2597 2598 if ( $truncate_by_byte_length ) { 2599 mbstring_binary_safe_encoding(); 2600 if ( false !== $length && strlen( $value['value'] ) > $length ) { 2601 $value['value'] = substr( $value['value'], 0, $length ); 2602 } 2603 reset_mbstring_encoding(); 2604 2605 if ( ! $needs_validation ) { 2588 2606 continue; 2589 2607 } … … 2591 2609 2592 2610 // utf8 can be handled by regex, which is a bunch faster than a DB lookup. 2593 if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset) {2611 if ( ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) && function_exists( 'mb_strlen' ) ) { 2594 2612 $regex = '/ 2595 2613 ( … … 2601 2619 | [\xEE-\xEF][\x80-\xBF]{2}'; 2602 2620 2603 if ( 'utf8mb4' === $charset ) {2621 if ( 'utf8mb4' === $charset ) { 2604 2622 $regex .= ' 2605 2623 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 … … 2614 2632 /x'; 2615 2633 $value['value'] = preg_replace( $regex, '$1', $value['value'] ); 2634 2635 2636 if ( false !== $length && mb_strlen( $value['value'], 'UTF-8' ) > $length ) { 2637 $value['value'] = mb_substr( $value['value'], 0, $length, 'UTF-8' ); 2638 } 2616 2639 continue; 2617 2640 } … … 2630 2653 } 2631 2654 2632 // Split the CONVERT() calls by charset, so we can make sure the connection is right 2633 $queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( %s USING {$value['charset']} )", $value['value'] ); 2655 // We're going to need to truncate by characters or bytes, depending on the length value we have. 2656 if ( 'byte' === $value['length']['type'] ) { 2657 // Split the CONVERT() calls by charset, so we can make sure the connection is right 2658 $queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( LEFT( CONVERT( %s USING binary ), %d ) USING {$value['charset']} )", $value['value'], $value['length']['length'] ); 2659 } else { 2660 $queries[ $value['charset'] ][ $col ] = $this->prepare( "LEFT( CONVERT( %s USING {$value['charset']} ), %d )", $value['value'], $value['length']['length'] ); 2661 } 2662 2634 2663 unset( $data[ $col ]['db'] ); 2635 2664 } … … 2650 2679 $this->check_current_query = false; 2651 2680 2652 $row = $this->get_row( "SELECT " . implode( ', ', $query ), ARRAY_N ); 2681 $sql = array(); 2682 foreach ( $query as $column => $column_query ) { 2683 $sql[] = $column_query . " AS x_$column"; 2684 } 2685 2686 $row = $this->get_row( "SELECT " . implode( ', ', $sql ), ARRAY_A ); 2653 2687 if ( ! $row ) { 2654 2688 $this->set_charset( $this->dbh, $connection_charset ); … … 2656 2690 } 2657 2691 2658 $cols = array_keys( $query ); 2659 $col_count = count( $cols ); 2660 for ( $ii = 0; $ii < $col_count; $ii++ ) { 2661 $data[ $cols[ $ii ] ]['value'] = $row[ $ii ]; 2692 foreach ( array_keys( $query ) as $column ) { 2693 $data[ $column ]['value'] = $row["x_$column"]; 2662 2694 } 2663 2695 } … … 2701 2733 'charset' => $charset, 2702 2734 'ascii' => false, 2735 'length' => false, 2703 2736 ); 2704 2737 … … 2723 2756 */ 2724 2757 public function strip_invalid_text_for_column( $table, $column, $value ) { 2725 if ( ! is_string( $value ) || $this->check_ascii( $value )) {2758 if ( ! is_string( $value ) ) { 2726 2759 return $value; 2727 2760 } … … 2740 2773 'value' => $value, 2741 2774 'charset' => $charset, 2742 ' ascii' => false,2775 'length' => $this->get_col_length( $table, $column ), 2743 2776 ) 2744 2777 ); -
branches/4.1/tests/phpunit/tests/comment.php
r32307 r32387 52 52 } 53 53 54 $post_id = $this->factory->post->create(); 54 $u = $this->factory->user->create(); 55 $post_id = $this->factory->post->create( array( 'post_author' => $u ) ); 55 56 56 57 $data = array( -
branches/4.1/tests/phpunit/tests/compat.php
r25002 r32387 3 3 /** 4 4 * @group compat 5 * @group security-153 5 6 */ 6 7 class Tests_Compat extends WP_UnitTestCase { 7 function test_mb_substr() { 8 $this->assertEquals('баб', _mb_substr('баба', 0, 3)); 9 $this->assertEquals('баб', _mb_substr('баба', 0, -1)); 10 $this->assertEquals('баб', _mb_substr('баба', 0, -1)); 11 $this->assertEquals('I am your б', _mb_substr('I am your баба', 0, 11)); 8 function utf8_string_lengths() { 9 return array( 10 // string, character_length, byte_length 11 array( 'баба', 4, 8 ), 12 array( 'баб', 3, 6 ), 13 array( 'I am your б', 11, 12 ), 14 array( '1111111111', 10, 10 ), 15 array( '²²²²²²²²²²', 10, 20 ), 16 array( '3333333333', 10, 30 ), 17 array( '𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜', 10, 40 ), 18 array( '1²3𝟜1²3𝟜1²3𝟜', 12, 30 ), 19 ); 20 } 21 22 function utf8_substrings() { 23 return array( 24 // string, start, length, character_substring, byte_substring 25 array( 'баба', 0, 3, 'баб', "б\xD0" ), 26 array( 'баба', 0, -1, 'баб', "баб\xD0" ), 27 array( 'баба', 1, null, 'аба', "\xB1аба" ), 28 array( 'баба', -3, null, 'аба', "\xB1а" ), 29 array( 'баба', -3, 2, 'аб', "\xB1\xD0" ), 30 array( 'баба', -1, 2, 'а', "\xB0" ), 31 array( 'I am your баба', 0, 11, 'I am your б', "I am your \xD0" ), 32 ); 33 } 34 35 /** 36 * @dataProvider utf8_string_lengths 37 */ 38 function test_mb_strlen( $string, $expected_character_length ) { 39 $this->assertEquals( $expected_character_length, _mb_strlen( $string, 'UTF-8' ) ); 40 } 41 42 /** 43 * @dataProvider utf8_string_lengths 44 */ 45 function test_mb_strlen_via_regex( $string, $expected_character_length ) { 46 _wp_can_use_pcre_u( false ); 47 $this->assertEquals( $expected_character_length, _mb_strlen( $string, 'UTF-8' ) ); 48 _wp_can_use_pcre_u( 'reset' ); 49 } 50 51 /** 52 * @dataProvider utf8_string_lengths 53 */ 54 function test_8bit_mb_strlen( $string, $expected_character_length, $expected_byte_length ) { 55 $this->assertEquals( $expected_byte_length, _mb_strlen( $string, '8bit' ) ); 56 } 57 58 /** 59 * @dataProvider utf8_substrings 60 */ 61 function test_mb_substr( $string, $start, $length, $expected_character_substring ) { 62 $this->assertEquals( $expected_character_substring, _mb_substr( $string, $start, $length, 'UTF-8' ) ); 63 } 64 65 /** 66 * @dataProvider utf8_substrings 67 */ 68 function test_mb_substr_via_regex( $string, $start, $length, $expected_character_substring ) { 69 _wp_can_use_pcre_u( false ); 70 $this->assertEquals( $expected_character_substring, _mb_substr( $string, $start, $length, 'UTF-8' ) ); 71 _wp_can_use_pcre_u( 'reset' ); 72 } 73 74 /** 75 * @dataProvider utf8_substrings 76 */ 77 function test_8bit_mb_substr( $string, $start, $length, $expected_character_substring, $expected_byte_substring ) { 78 $this->assertEquals( $expected_byte_substring, _mb_substr( $string, $start, $length, '8bit' ) ); 79 } 80 81 function test_mb_substr_phpcore(){ 82 /* https://github.com/php/php-src/blob/php-5.6.8/ext/mbstring/tests/mb_substr_basic.phpt */ 83 $string_ascii = 'ABCDEF'; 84 $string_mb = base64_decode('5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII='); 85 86 $this->assertEquals( 'DEF', _mb_substr($string_ascii, 3) ); 87 $this->assertEquals( 'DEF', _mb_substr($string_ascii, 3, 5, 'ISO-8859-1') ); 88 89 // specific latin-1 as that is the default the core php test opporates under 90 $this->assertEquals( 'peacrOiqng==' , base64_encode( _mb_substr($string_mb, 2, 7, 'latin-1' ) ) ); 91 $this->assertEquals( '6Kqe44OG44Kt44K544OI44Gn44GZ', base64_encode( _mb_substr($string_mb, 2, 7, 'utf-8') ) ); 92 93 /* https://github.com/php/php-src/blob/php-5.6.8/ext/mbstring/tests/mb_substr_variation1.phpt */ 94 $start = 0; 95 $length = 5; 96 $unset_var = 10; 97 unset ($unset_var); 98 $heredoc = <<<EOT 99 hello world 100 EOT; 101 $inputs = array( 102 /*1*/ 0, 103 1, 104 12345, 105 -2345, 106 // float data 107 /*5*/ 10.5, 108 -10.5, 109 12.3456789000e10, 110 12.3456789000E-10, 111 .5, 112 // null data 113 /*10*/ NULL, 114 null, 115 // boolean data 116 /*12*/ true, 117 false, 118 TRUE, 119 FALSE, 120 // empty data 121 /*16*/ "", 122 '', 123 // string data 124 /*18*/ "string", 125 'string', 126 $heredoc, 127 // object data 128 /*21*/ new classA(), 129 // undefined data 130 /*22*/ @$undefined_var, 131 // unset data 132 /*23*/ @$unset_var, 133 ); 134 $outputs = array( 135 "0", 136 "1", 137 "12345", 138 "-2345", 139 "10.5", 140 "-10.5", 141 "12345", 142 "1.234", 143 "0.5", 144 "", 145 "", 146 "1", 147 "", 148 "1", 149 "", 150 "", 151 "", 152 "strin", 153 "strin", 154 "hello", 155 "Class", 156 "", 157 "", 158 ); 159 $iterator = 0; 160 foreach($inputs as $input) { 161 $this->assertEquals( $outputs[$iterator] , _mb_substr($input, $start, $length) ); 162 $iterator++; 163 } 164 12 165 } 13 166 … … 35 188 } 36 189 } 190 191 /* used in test_mb_substr_phpcore */ 192 class classA { 193 public function __toString() { 194 return "Class A object"; 195 } 196 } -
branches/4.1/tests/phpunit/tests/db.php
r32307 r32387 747 747 'format' => '%s', 748 748 'charset' => $expected_charset, 749 'ascii' => false,750 749 'length' => $wpdb->get_col_length( $wpdb->posts, 'post_content' ), 751 750 ) -
branches/4.1/tests/phpunit/tests/db/charset.php
r32262 r32387 7 7 * 8 8 * @group wpdb 9 * @group security-153 9 10 */ 10 11 class Tests_DB_Charset extends WP_UnitTestCase { … … 29 30 'charset' => 'latin1', 30 31 'value' => "\xf0\x9f\x8e\xb7", 31 'expected' => "\xf0\x9f\x8e\xb7" 32 'expected' => "\xf0\x9f\x8e\xb7", 33 'length' => array( 'type' => 'char', 'length' => 100 ), 34 ), 35 'latin1_char_length' => array( 36 // latin1. latin1 never changes. 37 'charset' => 'latin1', 38 'value' => str_repeat( 'A', 11 ), 39 'expected' => str_repeat( 'A', 10 ), 40 'length' => array( 'type' => 'char', 'length' => 10 ), 41 ), 42 'latin1_byte_length' => array( 43 // latin1. latin1 never changes. 44 'charset' => 'latin1', 45 'value' => str_repeat( 'A', 11 ), 46 'expected' => str_repeat( 'A', 10 ), 47 'length' => array( 'type' => 'byte', 'length' => 10 ), 32 48 ), 33 49 'ascii' => array( … … 35 51 'charset' => 'ascii', 36 52 'value' => 'Hello World', 37 'expected' => 'Hello World' 53 'expected' => 'Hello World', 54 'length' => array( 'type' => 'char', 'length' => 100 ), 55 ), 56 'ascii_char_length' => array( 57 // ascii gets special treatment, make sure it's covered 58 'charset' => 'ascii', 59 'value' => str_repeat( 'A', 11 ), 60 'expected' => str_repeat( 'A', 10 ), 61 'length' => array( 'type' => 'char', 'length' => 10 ), 62 ), 63 'ascii_byte_length' => array( 64 // ascii gets special treatment, make sure it's covered 65 'charset' => 'ascii', 66 'value' => str_repeat( 'A', 11 ), 67 'expected' => str_repeat( 'A', 10 ), 68 'length' => array( 'type' => 'byte', 'length' => 10 ), 38 69 ), 39 70 'utf8' => array( … … 41 72 'charset' => 'utf8', 42 73 'value' => "H€llo\xf0\x9f\x98\x88World¢", 43 'expected' => 'H€lloWorld¢' 74 'expected' => 'H€lloWorld¢', 75 'length' => array( 'type' => 'char', 'length' => 100 ), 76 ), 77 'utf8_23char_length' => array( 78 // utf8 only allows <= 3-byte chars 79 'charset' => 'utf8', 80 'value' => str_repeat( "²3", 10 ), 81 'expected' => str_repeat( "²3", 5 ), 82 'length' => array( 'type' => 'char', 'length' => 10 ), 83 ), 84 'utf8_23byte_length' => array( 85 // utf8 only allows <= 3-byte chars 86 'charset' => 'utf8', 87 'value' => str_repeat( "²3", 10 ), 88 'expected' => "²3²3", 89 'length' => array( 'type' => 'byte', 'length' => 10 ), 90 ), 91 'utf8_3char_length' => array( 92 // utf8 only allows <= 3-byte chars 93 'charset' => 'utf8', 94 'value' => str_repeat( "3", 11 ), 95 'expected' => str_repeat( "3", 10 ), 96 'length' => array( 'type' => 'char', 'length' => 10 ), 97 ), 98 'utf8_3byte_length' => array( 99 // utf8 only allows <= 3-byte chars 100 'charset' => 'utf8', 101 'value' => str_repeat( "3", 11 ), 102 'expected' => "333", 103 'length' => array( 'type' => 'byte', 'length' => 10 ), 44 104 ), 45 105 'utf8mb3' => array( … … 47 107 'charset' => 'utf8mb3', 48 108 'value' => "H€llo\xf0\x9f\x98\x88World¢", 49 'expected' => 'H€lloWorld¢' 109 'expected' => 'H€lloWorld¢', 110 'length' => array( 'type' => 'char', 'length' => 100 ), 111 ), 112 'utf8mb3_23char_length' => array( 113 // utf8mb3 should behave the same an utf8 114 'charset' => 'utf8mb3', 115 'value' => str_repeat( "²3", 10 ), 116 'expected' => str_repeat( "²3", 5 ), 117 'length' => array( 'type' => 'char', 'length' => 10 ), 118 ), 119 'utf8mb3_23byte_length' => array( 120 // utf8mb3 should behave the same an utf8 121 'charset' => 'utf8mb3', 122 'value' => str_repeat( "²3", 10 ), 123 'expected' => "²3²3", 124 'length' => array( 'type' => 'byte', 'length' => 10 ), 125 ), 126 'utf8mb3_3char_length' => array( 127 // utf8mb3 should behave the same an utf8 128 'charset' => 'utf8mb3', 129 'value' => str_repeat( "3", 11 ), 130 'expected' => str_repeat( "3", 10 ), 131 'length' => array( 'type' => 'char', 'length' => 10 ), 132 ), 133 'utf8mb3_3byte_length' => array( 134 // utf8mb3 should behave the same an utf8 135 'charset' => 'utf8mb3', 136 'value' => str_repeat( "3", 10 ), 137 'expected' => "333", 138 'length' => array( 'type' => 'byte', 'length' => 10 ), 50 139 ), 51 140 'utf8mb4' => array( … … 53 142 'charset' => 'utf8mb4', 54 143 'value' => "H€llo\xf0\x9f\x98\x88World¢", 55 'expected' => "H€llo\xf0\x9f\x98\x88World¢" 144 'expected' => "H€llo\xf0\x9f\x98\x88World¢", 145 'length' => array( 'type' => 'char', 'length' => 100 ), 146 ), 147 'utf8mb4_234char_length' => array( 148 // utf8mb4 allows 4-byte characters, too 149 'charset' => 'utf8mb4', 150 'value' => str_repeat( "²3𝟜", 10 ), 151 'expected' => "²3𝟜²3𝟜²3𝟜²", 152 'length' => array( 'type' => 'char', 'length' => 10 ), 153 ), 154 'utf8mb4_234byte_length' => array( 155 // utf8mb4 allows 4-byte characters, too 156 'charset' => 'utf8mb4', 157 'value' => str_repeat( "²3𝟜", 10 ), 158 'expected' => "²3𝟜", 159 'length' => array( 'type' => 'byte', 'length' => 10 ), 160 ), 161 'utf8mb4_4char_length' => array( 162 // utf8mb4 allows 4-byte characters, too 163 'charset' => 'utf8mb4', 164 'value' => str_repeat( "𝟜", 11 ), 165 'expected' => str_repeat( "𝟜", 10 ), 166 'length' => array( 'type' => 'char', 'length' => 10 ), 167 ), 168 'utf8mb4_4byte_length' => array( 169 // utf8mb4 allows 4-byte characters, too 170 'charset' => 'utf8mb4', 171 'value' => str_repeat( "𝟜", 10 ), 172 'expected' => "𝟜𝟜", 173 'length' => array( 'type' => 'byte', 'length' => 10 ), 56 174 ), 57 175 'koi8r' => array( … … 59 177 'value' => "\xfdord\xf2ress", 60 178 'expected' => "\xfdord\xf2ress", 179 'length' => array( 'type' => 'char', 'length' => 100 ), 180 ), 181 'koi8r_char_length' => array( 182 'charset' => 'koi8r', 183 'value' => str_repeat( "\xfd\xf2", 10 ), 184 'expected' => str_repeat( "\xfd\xf2", 5 ), 185 'length' => array( 'type' => 'char', 'length' => 10 ), 186 ), 187 'koi8r_byte_length' => array( 188 'charset' => 'koi8r', 189 'value' => str_repeat( "\xfd\xf2", 10 ), 190 'expected' => str_repeat( "\xfd\xf2", 5 ), 191 'length' => array( 'type' => 'byte', 'length' => 10 ), 61 192 ), 62 193 'hebrew' => array( … … 64 195 'value' => "\xf9ord\xf7ress", 65 196 'expected' => "\xf9ord\xf7ress", 197 'length' => array( 'type' => 'char', 'length' => 100 ), 198 ), 199 'hebrew_char_length' => array( 200 'charset' => 'hebrew', 201 'value' => str_repeat( "\xf9\xf7", 10 ), 202 'expected' => str_repeat( "\xf9\xf7", 5 ), 203 'length' => array( 'type' => 'char', 'length' => 10 ), 204 ), 205 'hebrew_byte_length' => array( 206 'charset' => 'hebrew', 207 'value' => str_repeat( "\xf9\xf7", 10 ), 208 'expected' => str_repeat( "\xf9\xf7", 5 ), 209 'length' => array( 'type' => 'byte', 'length' => 10 ), 66 210 ), 67 211 'cp1251' => array( … … 69 213 'value' => "\xd8ord\xd0ress", 70 214 'expected' => "\xd8ord\xd0ress", 215 'length' => array( 'type' => 'char', 'length' => 100 ), 216 ), 217 'cp1251_char_length' => array( 218 'charset' => 'cp1251', 219 'value' => str_repeat( "\xd8\xd0", 10 ), 220 'expected' => str_repeat( "\xd8\xd0", 5 ), 221 'length' => array( 'type' => 'char', 'length' => 10 ), 222 ), 223 'cp1251_byte_length' => array( 224 'charset' => 'cp1251', 225 'value' => str_repeat( "\xd8\xd0", 10 ), 226 'expected' => str_repeat( "\xd8\xd0", 5 ), 227 'length' => array( 'type' => 'byte', 'length' => 10 ), 71 228 ), 72 229 'tis620' => array( … … 74 231 'value' => "\xccord\xe3ress", 75 232 'expected' => "\xccord\xe3ress", 233 'length' => array( 'type' => 'char', 'length' => 100 ), 234 ), 235 'tis620_char_length' => array( 236 'charset' => 'tis620', 237 'value' => str_repeat( "\xcc\xe3", 10 ), 238 'expected' => str_repeat( "\xcc\xe3", 5 ), 239 'length' => array( 'type' => 'char', 'length' => 10 ), 240 ), 241 'tis620_byte_length' => array( 242 'charset' => 'tis620', 243 'value' => str_repeat( "\xcc\xe3", 10 ), 244 'expected' => str_repeat( "\xcc\xe3", 5 ), 245 'length' => array( 'type' => 'byte', 'length' => 10 ), 76 246 ), 77 247 'false' => array( … … 79 249 'charset' => false, 80 250 'value' => 100, 81 'expected' => 100 251 'expected' => 100, 252 'length' => false, 82 253 ), 83 254 ); … … 95 266 'charset' => 'big5', 96 267 'value' => $big5, 97 'expected' => $big5 268 'expected' => $big5, 269 'length' => array( 'type' => 'char', 'length' => 100 ), 270 ); 271 272 $fields['big5_char_length'] = array( 273 'charset' => 'big5', 274 'value' => str_repeat( $big5, 10 ), 275 'expected' => str_repeat( $big5, 3 ) . 'a', 276 'length' => array( 'type' => 'char', 'length' => 10 ), 277 ); 278 279 $fields['big5_byte_length'] = array( 280 'charset' => 'big5', 281 'value' => str_repeat( $big5, 10 ), 282 'expected' => str_repeat( $big5, 2 ) . 'a', 283 'length' => array( 'type' => 'byte', 'length' => 10 ), 98 284 ); 99 285 } … … 167 353 168 354 $all_ascii_fields = array( 169 'post_content' => array( 'value' => 'foo foo foo!', 'format' => '%s', 'charset' => false),170 'post_excerpt' => array( 'value' => 'bar bar bar!', 'format' => '%s', 'charset' => false),355 'post_content' => array( 'value' => 'foo foo foo!', 'format' => '%s', 'charset' => $charset ), 356 'post_excerpt' => array( 'value' => 'bar bar bar!', 'format' => '%s', 'charset' => $charset ), 171 357 ); 172 358 173 359 // This is the same data used in process_field_charsets_for_nonexistent_table() 174 360 $non_ascii_string_fields = array( 175 'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s', 'charset' => $charset , 'ascii' => false),176 'post_excerpt' => array( 'value' => '¡bar bar bar!', 'format' => '%s', 'charset' => $charset , 'ascii' => false),361 'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s', 'charset' => $charset ), 362 'post_excerpt' => array( 'value' => '¡bar bar bar!', 'format' => '%s', 'charset' => $charset ), 177 363 ); 178 364 … … 541 727 self::$_wpdb->query( $drop ); 542 728 } 729 730 function test_strip_invalid_test_for_column_bails_if_ascii_input_too_long() { 731 global $wpdb; 732 733 // TEXT column 734 $stripped = $wpdb->strip_invalid_text_for_column( $wpdb->comments, 'comment_content', str_repeat( 'A', 65536 ) ); 735 $this->assertEquals( 65535, strlen( $stripped ) ); 736 737 // VARCHAR column 738 $stripped = $wpdb->strip_invalid_text_for_column( $wpdb->comments, 'comment_agent', str_repeat( 'A', 256 ) ); 739 $this->assertEquals( 255, strlen( $stripped ) ); 740 } 543 741 }
Note: See TracChangeset
for help on using the changeset viewer.