Changeset 32364
- Timestamp:
- 05/06/2015 02:59:50 AM (10 years ago)
- Location:
- trunk
- Files:
-
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-admin/includes/upgrade.php
r32310 r32364 528 528 upgrade_420(); 529 529 530 if ( $wp_current_db_version < 323 08)530 if ( $wp_current_db_version < 32364 ) 531 531 upgrade_430(); 532 532 … … 1447 1447 global $wp_current_db_version, $wpdb; 1448 1448 1449 if ( $wp_current_db_version < 323 08) {1449 if ( $wp_current_db_version < 32364 ) { 1450 1450 $content_length = $wpdb->get_col_length( $wpdb->comments, 'comment_content' ); 1451 if ( ! $content_length ) { 1452 $content_length = 65535; 1453 } 1451 if ( false === $content_length ) { 1452 $content_length = array( 1453 'type' => 'byte', 1454 'length' => 65535, 1455 ); 1456 } elseif ( ! is_array( $content_length ) ) { 1457 $length = (int) $content_length > 0 ? (int) $content_length : 65535; 1458 $content_length = array( 1459 'type' => 'byte', 1460 'length' => $length 1461 ); 1462 } 1463 1464 if ( 'byte' !== $content_length['type'] ) { 1465 // Sites with malformed DB schemas are on their own. 1466 return; 1467 } 1468 1469 $allowed_length = intval( $content_length['length'] ) - 10; 1454 1470 1455 1471 $comments = $wpdb->get_results( 1456 "SELECT comment_ID FROM $wpdb->comments1457 WHERE comment_date_gmt> '2015-04-26'1458 AND CHAR_LENGTH( comment_content ) >= $content_length1459 AND ( comment_content LIKE '%<%' OR comment_contentLIKE '%>%' )"1472 "SELECT `comment_ID` FROM `{$wpdb->comments}` 1473 WHERE `comment_date_gmt` > '2015-04-26' 1474 AND LENGTH( `comment_content` ) >= {$allowed_length} 1475 AND ( `comment_content` LIKE '%<%' OR `comment_content` LIKE '%>%' )" 1460 1476 ); 1461 1477 -
trunk/src/wp-includes/comment.php
r32116 r32364 2119 2119 $compacted = compact( 'comment_post_ID', 'comment_author', 'comment_author_email', 'comment_author_url', 'comment_author_IP', 'comment_date', 'comment_date_gmt', 'comment_content', 'comment_karma', 'comment_approved', 'comment_agent', 'comment_type', 'comment_parent', 'user_id' ); 2120 2120 if ( ! $wpdb->insert( $wpdb->comments, $compacted ) ) { 2121 $fields = array( 'comment_author', 'comment_author_email', 'comment_author_url', 'comment_content' ); 2122 2123 foreach( $fields as $field ) { 2124 if ( isset( $compacted[ $field ] ) ) { 2125 $compacted[ $field ] = $wpdb->strip_invalid_text_for_column( $wpdb->comments, $field, $compacted[ $field ] ); 2126 } 2127 } 2128 2129 if ( ! $wpdb->insert( $wpdb->comments, $compacted ) ) { 2130 return false; 2131 } 2121 return false; 2132 2122 } 2133 2123 … … 2253 2243 */ 2254 2244 function wp_new_comment( $commentdata ) { 2245 global $wpdb; 2246 2255 2247 if ( isset( $commentdata['user_ID'] ) ) { 2256 2248 $commentdata['user_id'] = $commentdata['user_ID'] = (int) $commentdata['user_ID']; … … 2296 2288 $comment_ID = wp_insert_comment($commentdata); 2297 2289 if ( ! $comment_ID ) { 2298 return false; 2290 $fields = array( 'comment_author', 'comment_author_email', 'comment_author_url', 'comment_content' ); 2291 2292 foreach( $fields as $field ) { 2293 if ( isset( $commentdata[ $field ] ) ) { 2294 $commentdata[ $field ] = $wpdb->strip_invalid_text_for_column( $wpdb->comments, $field, $commentdata[ $field ] ); 2295 } 2296 } 2297 2298 $commentdata = wp_filter_comment( $commentdata ); 2299 2300 $commentdata['comment_approved'] = wp_allow_comment( $commentdata ); 2301 2302 $comment_ID = wp_insert_comment( $commentdata ); 2303 if ( ! $comment_ID ) { 2304 return false; 2305 } 2299 2306 } 2300 2307 -
trunk/src/wp-includes/compat.php
r32115 r32364 14 14 } 15 15 16 /** 17 * Returns whether PCRE/u (PCRE_UTF8 modifier) is available for use. 18 * 19 * @ignore 20 * @since 4.2.2 21 * @access private 22 * 23 * @param bool $set - Used for testing only 24 * null : default - get PCRE/u capability 25 * false : Used for testing - return false for future calls to this function 26 * 'reset': Used for testing - restore default behavior of this function 27 */ 28 function _wp_can_use_pcre_u( $set = null ) { 29 static $utf8_pcre = 'reset'; 30 31 if ( null !== $set ) { 32 $utf8_pcre = $set; 33 } 34 35 if ( 'reset' === $utf8_pcre ) { 36 $utf8_pcre = @preg_match( '/^./u', 'a' ); 37 } 38 39 return $utf8_pcre; 40 } 41 16 42 if ( ! function_exists( 'mb_substr' ) ) : 17 43 function mb_substr( $str, $start, $length = null, $encoding = null ) { … … 20 46 endif; 21 47 48 /* 49 * Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit. 50 * For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence. 51 * The behavior of this function for invalid inputs is undefined. 52 */ 22 53 function _mb_substr( $str, $start, $length = null, $encoding = null ) { 54 if ( null === $encoding ) { 55 $encoding = get_option( 'blog_charset' ); 56 } 57 23 58 // The solution below works only for UTF-8, 24 59 // so in case of a different charset just use built-in substr() 25 $charset = get_option( 'blog_charset' ); 26 if ( ! in_array( $charset, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) { 60 if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) { 27 61 return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length ); 28 62 } 29 // Use the regex unicode support to separate the UTF-8 characters into an array 30 preg_match_all( '/./us', $str, $match ); 31 $chars = is_null( $length ) ? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length ); 32 return implode( '', $chars ); 63 64 if ( _wp_can_use_pcre_u() ) { 65 // Use the regex unicode support to separate the UTF-8 characters into an array 66 preg_match_all( '/./us', $str, $match ); 67 $chars = is_null( $length ) ? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length ); 68 return implode( '', $chars ); 69 } 70 71 $regex = '/( 72 [\x00-\x7F] # single-byte sequences 0xxxxxxx 73 | [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx 74 | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 75 | [\xE1-\xEC][\x80-\xBF]{2} 76 | \xED[\x80-\x9F][\x80-\xBF] 77 | [\xEE-\xEF][\x80-\xBF]{2} 78 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 79 | [\xF1-\xF3][\x80-\xBF]{3} 80 | \xF4[\x80-\x8F][\x80-\xBF]{2} 81 )/x'; 82 83 $chars = array( '' ); // Start with 1 element instead of 0 since the first thing we do is pop 84 do { 85 // We had some string left over from the last round, but we counted it in that last round. 86 array_pop( $chars ); 87 88 // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string) 89 $pieces = preg_split( $regex, $str, 1000, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); 90 91 $chars = array_merge( $chars, $pieces ); 92 } while ( count( $pieces ) > 1 && $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop. 93 94 return join( '', array_slice( $chars, $start, $length ) ); 33 95 } 34 96 … … 39 101 endif; 40 102 103 /* 104 * Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit. 105 * For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence. 106 * The behavior of this function for invalid inputs is undefined. 107 */ 41 108 function _mb_strlen( $str, $encoding = null ) { 109 if ( null === $encoding ) { 110 $encoding = get_option( 'blog_charset' ); 111 } 112 42 113 // The solution below works only for UTF-8, 43 114 // so in case of a different charset just use built-in strlen() 44 $charset = get_option( 'blog_charset' ); 45 if ( ! in_array( $charset, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) { 115 if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) { 46 116 return strlen( $str ); 47 117 } 48 // Use the regex unicode support to separate the UTF-8 characters into an array 49 preg_match_all( '/./us', $str, $match ); 50 return count( $match[0] ); 118 119 if ( _wp_can_use_pcre_u() ) { 120 // Use the regex unicode support to separate the UTF-8 characters into an array 121 preg_match_all( '/./us', $str, $match ); 122 return count( $match[0] ); 123 } 124 125 $regex = '/(?: 126 [\x00-\x7F] # single-byte sequences 0xxxxxxx 127 | [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx 128 | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 129 | [\xE1-\xEC][\x80-\xBF]{2} 130 | \xED[\x80-\x9F][\x80-\xBF] 131 | [\xEE-\xEF][\x80-\xBF]{2} 132 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 133 | [\xF1-\xF3][\x80-\xBF]{3} 134 | \xF4[\x80-\x8F][\x80-\xBF]{2} 135 )/x'; 136 137 $count = 1; // Start at 1 instead of 0 since the first thing we do is decrement 138 do { 139 // We had some string left over from the last round, but we counted it in that last round. 140 $count--; 141 142 // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string) 143 $pieces = preg_split( $regex, $str, 1000 ); 144 145 // Increment 146 $count += count( $pieces ); 147 } while ( $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop. 148 149 // Fencepost: preg_split() always returns one extra item in the array 150 return --$count; 51 151 } 52 152 -
trunk/src/wp-includes/version.php
r32308 r32364 12 12 * @global int $wp_db_version 13 13 */ 14 $wp_db_version = 323 08;14 $wp_db_version = 32364; 15 15 16 16 /** -
trunk/src/wp-includes/wp-db.php
r32306 r32364 1810 1810 */ 1811 1811 function _insert_replace_helper( $table, $data, $format = null, $type = 'INSERT' ) { 1812 $this->insert_id = 0; 1813 1812 1814 if ( ! in_array( strtoupper( $type ), array( 'REPLACE', 'INSERT' ) ) ) { 1813 1815 return false; … … 1830 1832 $sql = "$type INTO `$table` ($fields) VALUES ($formats)"; 1831 1833 1832 $this->insert_id = 0;1833 1834 $this->check_current_query = false; 1834 1835 return $this->query( $this->prepare( $sql, $values ) ); … … 2022 2023 // This checks %d/%f versus ! %s because it's sprintf() could take more. 2023 2024 $value['charset'] = false; 2024 } elseif ( $this->check_ascii( $value['value'] ) ) {2025 // If it's ASCII, then we don't need the charset. We can skip this field.2026 $value['charset'] = false;2027 2025 } else { 2028 2026 $value['charset'] = $this->get_col_charset( $table, $field ); … … 2030 2028 return false; 2031 2029 } 2032 2033 // This isn't ASCII. Don't have strip_invalid_text() re-check.2034 $value['ascii'] = false;2035 2030 } 2036 2031 … … 2063 2058 return false; 2064 2059 } 2065 }2066 2067 if ( false !== $value['length'] && mb_strlen( $value['value'] ) > $value['length'] ) {2068 return false;2069 2060 } 2070 2061 … … 2407 2398 /** 2408 2399 * Retrieve the maximum string length allowed in a given column. 2400 * The length may either be specified as a byte length or a character length. 2409 2401 * 2410 2402 * @since 4.2.1 … … 2413 2405 * @param string $table Table name. 2414 2406 * @param string $column Column name. 2415 * @return mixed Max column length as an int. False if the column has no 2416 * length. WP_Error object if there was an error. 2407 * @return mixed array( 'length' => (int), 'type' => 'byte' | 'char' ) 2408 * false if the column has no length (for example, numeric column) 2409 * WP_Error object if there was an error. 2417 2410 */ 2418 2411 public function get_col_length( $table, $column ) { … … 2447 2440 2448 2441 switch( $type ) { 2442 case 'char': 2443 case 'varchar': 2444 return array( 2445 'type' => 'char', 2446 'length' => (int) $length, 2447 ); 2448 break; 2449 2449 case 'binary': 2450 case 'char':2451 2450 case 'varbinary': 2452 case 'varchar': 2453 return $length; 2451 return array( 2452 'type' => 'byte', 2453 'length' => (int) $length, 2454 ); 2454 2455 break; 2455 2456 case 'tinyblob': 2456 2457 case 'tinytext': 2457 return 255; // 2^8 - 1 2458 return array( 2459 'type' => 'byte', 2460 'length' => 255, // 2^8 - 1 2461 ); 2458 2462 break; 2459 2463 case 'blob': 2460 2464 case 'text': 2461 return 65535; // 2^16 - 1 2465 return array( 2466 'type' => 'byte', 2467 'length' => 65535, // 2^16 - 1 2468 ); 2462 2469 break; 2463 2470 case 'mediumblob': 2464 2471 case 'mediumtext': 2465 return 16777215; // 2^24 - 1 2472 return array( 2473 'type' => 'byte', 2474 'length' => 16777215, // 2^24 - 1 2475 ); 2466 2476 break; 2467 2477 case 'longblob': 2468 2478 case 'longtext': 2469 return 4294967295; // 2^32 - 1 2479 return array( 2480 'type' => 'byte', 2481 'length' => 4294967295, // 2^32 - 1 2482 ); 2470 2483 break; 2471 2484 default: … … 2573 2586 */ 2574 2587 protected function strip_invalid_text( $data ) { 2575 // Some multibyte character sets that we can check in PHP.2576 $mb_charsets = array(2577 'ascii' => 'ASCII',2578 'big5' => 'BIG-5',2579 'eucjpms' => 'eucJP-win',2580 'gb2312' => 'EUC-CN',2581 'ujis' => 'EUC-JP',2582 'utf32' => 'UTF-32',2583 );2584 2585 $supported_charsets = array();2586 if ( function_exists( 'mb_list_encodings' ) ) {2587 $supported_charsets = mb_list_encodings();2588 }2589 2590 2588 $db_check_string = false; 2591 2589 … … 2593 2591 $charset = $value['charset']; 2594 2592 2595 // Column isn't a string, or is latin1, which will will happily store anything. 2596 if ( false === $charset || 'latin1' === $charset ) { 2593 if ( is_array( $value['length'] ) ) { 2594 $length = $value['length']['length']; 2595 } else { 2596 $length = false; 2597 } 2598 2599 // There's no charset to work with. 2600 if ( false === $charset ) { 2597 2601 continue; 2598 2602 } 2599 2603 2604 // Column isn't a string. 2600 2605 if ( ! is_string( $value['value'] ) ) { 2601 2606 continue; 2602 2607 } 2603 2608 2604 // ASCII is always OK. 2605 if ( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) ) { 2606 continue; 2607 } 2608 2609 // Convert the text locally. 2610 if ( $supported_charsets ) { 2611 if ( isset( $mb_charsets[ $charset ] ) && in_array( $mb_charsets[ $charset ], $supported_charsets ) ) { 2612 $value['value'] = mb_convert_encoding( $value['value'], $mb_charsets[ $charset ], $mb_charsets[ $charset ] ); 2609 $truncate_by_byte_length = 'byte' === $value['length']['type']; 2610 2611 $needs_validation = true; 2612 if ( 2613 // latin1 can store any byte sequence 2614 'latin1' === $charset 2615 || 2616 // ASCII is always OK. 2617 ( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) ) 2618 ) { 2619 $truncate_by_byte_length = true; 2620 $needs_validation = false; 2621 } 2622 2623 if ( $truncate_by_byte_length ) { 2624 mbstring_binary_safe_encoding(); 2625 if ( false !== $length && strlen( $value['value'] ) > $length ) { 2626 $value['value'] = substr( $value['value'], 0, $length ); 2627 } 2628 reset_mbstring_encoding(); 2629 2630 if ( ! $needs_validation ) { 2613 2631 continue; 2614 2632 } … … 2616 2634 2617 2635 // utf8 can be handled by regex, which is a bunch faster than a DB lookup. 2618 if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset) {2636 if ( ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) && function_exists( 'mb_strlen' ) ) { 2619 2637 $regex = '/ 2620 2638 ( … … 2626 2644 | [\xEE-\xEF][\x80-\xBF]{2}'; 2627 2645 2628 if ( 'utf8mb4' === $charset ) {2646 if ( 'utf8mb4' === $charset ) { 2629 2647 $regex .= ' 2630 2648 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 … … 2639 2657 /x'; 2640 2658 $value['value'] = preg_replace( $regex, '$1', $value['value'] ); 2659 2660 2661 if ( false !== $length && mb_strlen( $value['value'], 'UTF-8' ) > $length ) { 2662 $value['value'] = mb_substr( $value['value'], 0, $length, 'UTF-8' ); 2663 } 2641 2664 continue; 2642 2665 } … … 2655 2678 } 2656 2679 2657 // Split the CONVERT() calls by charset, so we can make sure the connection is right 2658 $queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( %s USING {$value['charset']} )", $value['value'] ); 2680 // We're going to need to truncate by characters or bytes, depending on the length value we have. 2681 if ( 'byte' === $value['length']['type'] ) { 2682 // Split the CONVERT() calls by charset, so we can make sure the connection is right 2683 $queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( LEFT( CONVERT( %s USING binary ), %d ) USING {$value['charset']} )", $value['value'], $value['length']['length'] ); 2684 } else { 2685 $queries[ $value['charset'] ][ $col ] = $this->prepare( "LEFT( CONVERT( %s USING {$value['charset']} ), %d )", $value['value'], $value['length']['length'] ); 2686 } 2687 2659 2688 unset( $data[ $col ]['db'] ); 2660 2689 } … … 2675 2704 $this->check_current_query = false; 2676 2705 2677 $row = $this->get_row( "SELECT " . implode( ', ', $query ), ARRAY_N ); 2706 $sql = array(); 2707 foreach ( $query as $column => $column_query ) { 2708 $sql[] = $column_query . " AS x_$column"; 2709 } 2710 2711 $row = $this->get_row( "SELECT " . implode( ', ', $sql ), ARRAY_A ); 2678 2712 if ( ! $row ) { 2679 2713 $this->set_charset( $this->dbh, $connection_charset ); … … 2681 2715 } 2682 2716 2683 $cols = array_keys( $query ); 2684 $col_count = count( $cols ); 2685 for ( $ii = 0; $ii < $col_count; $ii++ ) { 2686 $data[ $cols[ $ii ] ]['value'] = $row[ $ii ]; 2717 foreach ( array_keys( $query ) as $column ) { 2718 $data[ $column ]['value'] = $row["x_$column"]; 2687 2719 } 2688 2720 } … … 2726 2758 'charset' => $charset, 2727 2759 'ascii' => false, 2760 'length' => false, 2728 2761 ); 2729 2762 … … 2748 2781 */ 2749 2782 public function strip_invalid_text_for_column( $table, $column, $value ) { 2750 if ( ! is_string( $value ) || $this->check_ascii( $value )) {2783 if ( ! is_string( $value ) ) { 2751 2784 return $value; 2752 2785 } … … 2765 2798 'value' => $value, 2766 2799 'charset' => $charset, 2767 ' ascii' => false,2800 'length' => $this->get_col_length( $table, $column ), 2768 2801 ) 2769 2802 ); -
trunk/tests/phpunit/tests/comment.php
r32306 r32364 122 122 } 123 123 124 $post_id = $this->factory->post->create(); 124 $u = $this->factory->user->create(); 125 $post_id = $this->factory->post->create( array( 'post_author' => $u ) ); 125 126 126 127 $data = array( … … 137 138 $id = wp_new_comment( $data ); 138 139 139 $this->assertFalse( $id ); 140 $comment = get_comment( $id ); 141 142 $this->assertEquals( strlen( $comment->comment_content ), 65535 ); 140 143 141 144 // Cleanup. -
trunk/tests/phpunit/tests/compat.php
r25002 r32364 3 3 /** 4 4 * @group compat 5 * @group security-153 5 6 */ 6 7 class Tests_Compat extends WP_UnitTestCase { 7 function test_mb_substr() { 8 $this->assertEquals('баб', _mb_substr('баба', 0, 3)); 9 $this->assertEquals('баб', _mb_substr('баба', 0, -1)); 10 $this->assertEquals('баб', _mb_substr('баба', 0, -1)); 11 $this->assertEquals('I am your б', _mb_substr('I am your баба', 0, 11)); 8 function utf8_string_lengths() { 9 return array( 10 // string, character_length, byte_length 11 array( 'баба', 4, 8 ), 12 array( 'баб', 3, 6 ), 13 array( 'I am your б', 11, 12 ), 14 array( '1111111111', 10, 10 ), 15 array( '²²²²²²²²²²', 10, 20 ), 16 array( '3333333333', 10, 30 ), 17 array( '𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜', 10, 40 ), 18 array( '1²3𝟜1²3𝟜1²3𝟜', 12, 30 ), 19 ); 20 } 21 22 function utf8_substrings() { 23 return array( 24 // string, start, length, character_substring, byte_substring 25 array( 'баба', 0, 3, 'баб', "б\xD0" ), 26 array( 'баба', 0, -1, 'баб', "баб\xD0" ), 27 array( 'баба', 1, null, 'аба', "\xB1аба" ), 28 array( 'баба', -3, null, 'аба', "\xB1а" ), 29 array( 'баба', -3, 2, 'аб', "\xB1\xD0" ), 30 array( 'баба', -1, 2, 'а', "\xB0" ), 31 array( 'I am your баба', 0, 11, 'I am your б', "I am your \xD0" ), 32 ); 33 } 34 35 /** 36 * @dataProvider utf8_string_lengths 37 */ 38 function test_mb_strlen( $string, $expected_character_length ) { 39 $this->assertEquals( $expected_character_length, _mb_strlen( $string, 'UTF-8' ) ); 40 } 41 42 /** 43 * @dataProvider utf8_string_lengths 44 */ 45 function test_mb_strlen_via_regex( $string, $expected_character_length ) { 46 _wp_can_use_pcre_u( false ); 47 $this->assertEquals( $expected_character_length, _mb_strlen( $string, 'UTF-8' ) ); 48 _wp_can_use_pcre_u( 'reset' ); 49 } 50 51 /** 52 * @dataProvider utf8_string_lengths 53 */ 54 function test_8bit_mb_strlen( $string, $expected_character_length, $expected_byte_length ) { 55 $this->assertEquals( $expected_byte_length, _mb_strlen( $string, '8bit' ) ); 56 } 57 58 /** 59 * @dataProvider utf8_substrings 60 */ 61 function test_mb_substr( $string, $start, $length, $expected_character_substring ) { 62 $this->assertEquals( $expected_character_substring, _mb_substr( $string, $start, $length, 'UTF-8' ) ); 63 } 64 65 /** 66 * @dataProvider utf8_substrings 67 */ 68 function test_mb_substr_via_regex( $string, $start, $length, $expected_character_substring ) { 69 _wp_can_use_pcre_u( false ); 70 $this->assertEquals( $expected_character_substring, _mb_substr( $string, $start, $length, 'UTF-8' ) ); 71 _wp_can_use_pcre_u( 'reset' ); 72 } 73 74 /** 75 * @dataProvider utf8_substrings 76 */ 77 function test_8bit_mb_substr( $string, $start, $length, $expected_character_substring, $expected_byte_substring ) { 78 $this->assertEquals( $expected_byte_substring, _mb_substr( $string, $start, $length, '8bit' ) ); 79 } 80 81 function test_mb_substr_phpcore(){ 82 /* https://github.com/php/php-src/blob/php-5.6.8/ext/mbstring/tests/mb_substr_basic.phpt */ 83 $string_ascii = 'ABCDEF'; 84 $string_mb = base64_decode('5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII='); 85 86 $this->assertEquals( 'DEF', _mb_substr($string_ascii, 3) ); 87 $this->assertEquals( 'DEF', _mb_substr($string_ascii, 3, 5, 'ISO-8859-1') ); 88 89 // specific latin-1 as that is the default the core php test opporates under 90 $this->assertEquals( 'peacrOiqng==' , base64_encode( _mb_substr($string_mb, 2, 7, 'latin-1' ) ) ); 91 $this->assertEquals( '6Kqe44OG44Kt44K544OI44Gn44GZ', base64_encode( _mb_substr($string_mb, 2, 7, 'utf-8') ) ); 92 93 /* https://github.com/php/php-src/blob/php-5.6.8/ext/mbstring/tests/mb_substr_variation1.phpt */ 94 $start = 0; 95 $length = 5; 96 $unset_var = 10; 97 unset ($unset_var); 98 $heredoc = <<<EOT 99 hello world 100 EOT; 101 $inputs = array( 102 /*1*/ 0, 103 1, 104 12345, 105 -2345, 106 // float data 107 /*5*/ 10.5, 108 -10.5, 109 12.3456789000e10, 110 12.3456789000E-10, 111 .5, 112 // null data 113 /*10*/ NULL, 114 null, 115 // boolean data 116 /*12*/ true, 117 false, 118 TRUE, 119 FALSE, 120 // empty data 121 /*16*/ "", 122 '', 123 // string data 124 /*18*/ "string", 125 'string', 126 $heredoc, 127 // object data 128 /*21*/ new classA(), 129 // undefined data 130 /*22*/ @$undefined_var, 131 // unset data 132 /*23*/ @$unset_var, 133 ); 134 $outputs = array( 135 "0", 136 "1", 137 "12345", 138 "-2345", 139 "10.5", 140 "-10.5", 141 "12345", 142 "1.234", 143 "0.5", 144 "", 145 "", 146 "1", 147 "", 148 "1", 149 "", 150 "", 151 "", 152 "strin", 153 "strin", 154 "hello", 155 "Class", 156 "", 157 "", 158 ); 159 $iterator = 0; 160 foreach($inputs as $input) { 161 $this->assertEquals( $outputs[$iterator] , _mb_substr($input, $start, $length) ); 162 $iterator++; 163 } 164 12 165 } 13 166 … … 35 188 } 36 189 } 190 191 /* used in test_mb_substr_phpcore */ 192 class classA { 193 public function __toString() { 194 return "Class A object"; 195 } 196 } -
trunk/tests/phpunit/tests/db.php
r32299 r32364 747 747 'format' => '%s', 748 748 'charset' => $expected_charset, 749 'ascii' => false,750 749 'length' => $wpdb->get_col_length( $wpdb->posts, 'post_content' ), 751 750 ) -
trunk/tests/phpunit/tests/db/charset.php
r32261 r32364 7 7 * 8 8 * @group wpdb 9 * @group security-153 9 10 */ 10 11 class Tests_DB_Charset extends WP_UnitTestCase { … … 29 30 'charset' => 'latin1', 30 31 'value' => "\xf0\x9f\x8e\xb7", 31 'expected' => "\xf0\x9f\x8e\xb7" 32 'expected' => "\xf0\x9f\x8e\xb7", 33 'length' => array( 'type' => 'char', 'length' => 100 ), 34 ), 35 'latin1_char_length' => array( 36 // latin1. latin1 never changes. 37 'charset' => 'latin1', 38 'value' => str_repeat( 'A', 11 ), 39 'expected' => str_repeat( 'A', 10 ), 40 'length' => array( 'type' => 'char', 'length' => 10 ), 41 ), 42 'latin1_byte_length' => array( 43 // latin1. latin1 never changes. 44 'charset' => 'latin1', 45 'value' => str_repeat( 'A', 11 ), 46 'expected' => str_repeat( 'A', 10 ), 47 'length' => array( 'type' => 'byte', 'length' => 10 ), 32 48 ), 33 49 'ascii' => array( … … 35 51 'charset' => 'ascii', 36 52 'value' => 'Hello World', 37 'expected' => 'Hello World' 53 'expected' => 'Hello World', 54 'length' => array( 'type' => 'char', 'length' => 100 ), 55 ), 56 'ascii_char_length' => array( 57 // ascii gets special treatment, make sure it's covered 58 'charset' => 'ascii', 59 'value' => str_repeat( 'A', 11 ), 60 'expected' => str_repeat( 'A', 10 ), 61 'length' => array( 'type' => 'char', 'length' => 10 ), 62 ), 63 'ascii_byte_length' => array( 64 // ascii gets special treatment, make sure it's covered 65 'charset' => 'ascii', 66 'value' => str_repeat( 'A', 11 ), 67 'expected' => str_repeat( 'A', 10 ), 68 'length' => array( 'type' => 'byte', 'length' => 10 ), 38 69 ), 39 70 'utf8' => array( … … 41 72 'charset' => 'utf8', 42 73 'value' => "H€llo\xf0\x9f\x98\x88World¢", 43 'expected' => 'H€lloWorld¢' 74 'expected' => 'H€lloWorld¢', 75 'length' => array( 'type' => 'char', 'length' => 100 ), 76 ), 77 'utf8_23char_length' => array( 78 // utf8 only allows <= 3-byte chars 79 'charset' => 'utf8', 80 'value' => str_repeat( "²3", 10 ), 81 'expected' => str_repeat( "²3", 5 ), 82 'length' => array( 'type' => 'char', 'length' => 10 ), 83 ), 84 'utf8_23byte_length' => array( 85 // utf8 only allows <= 3-byte chars 86 'charset' => 'utf8', 87 'value' => str_repeat( "²3", 10 ), 88 'expected' => "²3²3", 89 'length' => array( 'type' => 'byte', 'length' => 10 ), 90 ), 91 'utf8_3char_length' => array( 92 // utf8 only allows <= 3-byte chars 93 'charset' => 'utf8', 94 'value' => str_repeat( "3", 11 ), 95 'expected' => str_repeat( "3", 10 ), 96 'length' => array( 'type' => 'char', 'length' => 10 ), 97 ), 98 'utf8_3byte_length' => array( 99 // utf8 only allows <= 3-byte chars 100 'charset' => 'utf8', 101 'value' => str_repeat( "3", 11 ), 102 'expected' => "333", 103 'length' => array( 'type' => 'byte', 'length' => 10 ), 44 104 ), 45 105 'utf8mb3' => array( … … 47 107 'charset' => 'utf8mb3', 48 108 'value' => "H€llo\xf0\x9f\x98\x88World¢", 49 'expected' => 'H€lloWorld¢' 109 'expected' => 'H€lloWorld¢', 110 'length' => array( 'type' => 'char', 'length' => 100 ), 111 ), 112 'utf8mb3_23char_length' => array( 113 // utf8mb3 should behave the same an utf8 114 'charset' => 'utf8mb3', 115 'value' => str_repeat( "²3", 10 ), 116 'expected' => str_repeat( "²3", 5 ), 117 'length' => array( 'type' => 'char', 'length' => 10 ), 118 ), 119 'utf8mb3_23byte_length' => array( 120 // utf8mb3 should behave the same an utf8 121 'charset' => 'utf8mb3', 122 'value' => str_repeat( "²3", 10 ), 123 'expected' => "²3²3", 124 'length' => array( 'type' => 'byte', 'length' => 10 ), 125 ), 126 'utf8mb3_3char_length' => array( 127 // utf8mb3 should behave the same an utf8 128 'charset' => 'utf8mb3', 129 'value' => str_repeat( "3", 11 ), 130 'expected' => str_repeat( "3", 10 ), 131 'length' => array( 'type' => 'char', 'length' => 10 ), 132 ), 133 'utf8mb3_3byte_length' => array( 134 // utf8mb3 should behave the same an utf8 135 'charset' => 'utf8mb3', 136 'value' => str_repeat( "3", 10 ), 137 'expected' => "333", 138 'length' => array( 'type' => 'byte', 'length' => 10 ), 50 139 ), 51 140 'utf8mb4' => array( … … 53 142 'charset' => 'utf8mb4', 54 143 'value' => "H€llo\xf0\x9f\x98\x88World¢", 55 'expected' => "H€llo\xf0\x9f\x98\x88World¢" 144 'expected' => "H€llo\xf0\x9f\x98\x88World¢", 145 'length' => array( 'type' => 'char', 'length' => 100 ), 146 ), 147 'utf8mb4_234char_length' => array( 148 // utf8mb4 allows 4-byte characters, too 149 'charset' => 'utf8mb4', 150 'value' => str_repeat( "²3𝟜", 10 ), 151 'expected' => "²3𝟜²3𝟜²3𝟜²", 152 'length' => array( 'type' => 'char', 'length' => 10 ), 153 ), 154 'utf8mb4_234byte_length' => array( 155 // utf8mb4 allows 4-byte characters, too 156 'charset' => 'utf8mb4', 157 'value' => str_repeat( "²3𝟜", 10 ), 158 'expected' => "²3𝟜", 159 'length' => array( 'type' => 'byte', 'length' => 10 ), 160 ), 161 'utf8mb4_4char_length' => array( 162 // utf8mb4 allows 4-byte characters, too 163 'charset' => 'utf8mb4', 164 'value' => str_repeat( "𝟜", 11 ), 165 'expected' => str_repeat( "𝟜", 10 ), 166 'length' => array( 'type' => 'char', 'length' => 10 ), 167 ), 168 'utf8mb4_4byte_length' => array( 169 // utf8mb4 allows 4-byte characters, too 170 'charset' => 'utf8mb4', 171 'value' => str_repeat( "𝟜", 10 ), 172 'expected' => "𝟜𝟜", 173 'length' => array( 'type' => 'byte', 'length' => 10 ), 56 174 ), 57 175 'koi8r' => array( … … 59 177 'value' => "\xfdord\xf2ress", 60 178 'expected' => "\xfdord\xf2ress", 179 'length' => array( 'type' => 'char', 'length' => 100 ), 180 ), 181 'koi8r_char_length' => array( 182 'charset' => 'koi8r', 183 'value' => str_repeat( "\xfd\xf2", 10 ), 184 'expected' => str_repeat( "\xfd\xf2", 5 ), 185 'length' => array( 'type' => 'char', 'length' => 10 ), 186 ), 187 'koi8r_byte_length' => array( 188 'charset' => 'koi8r', 189 'value' => str_repeat( "\xfd\xf2", 10 ), 190 'expected' => str_repeat( "\xfd\xf2", 5 ), 191 'length' => array( 'type' => 'byte', 'length' => 10 ), 61 192 ), 62 193 'hebrew' => array( … … 64 195 'value' => "\xf9ord\xf7ress", 65 196 'expected' => "\xf9ord\xf7ress", 197 'length' => array( 'type' => 'char', 'length' => 100 ), 198 ), 199 'hebrew_char_length' => array( 200 'charset' => 'hebrew', 201 'value' => str_repeat( "\xf9\xf7", 10 ), 202 'expected' => str_repeat( "\xf9\xf7", 5 ), 203 'length' => array( 'type' => 'char', 'length' => 10 ), 204 ), 205 'hebrew_byte_length' => array( 206 'charset' => 'hebrew', 207 'value' => str_repeat( "\xf9\xf7", 10 ), 208 'expected' => str_repeat( "\xf9\xf7", 5 ), 209 'length' => array( 'type' => 'byte', 'length' => 10 ), 66 210 ), 67 211 'cp1251' => array( … … 69 213 'value' => "\xd8ord\xd0ress", 70 214 'expected' => "\xd8ord\xd0ress", 215 'length' => array( 'type' => 'char', 'length' => 100 ), 216 ), 217 'cp1251_char_length' => array( 218 'charset' => 'cp1251', 219 'value' => str_repeat( "\xd8\xd0", 10 ), 220 'expected' => str_repeat( "\xd8\xd0", 5 ), 221 'length' => array( 'type' => 'char', 'length' => 10 ), 222 ), 223 'cp1251_byte_length' => array( 224 'charset' => 'cp1251', 225 'value' => str_repeat( "\xd8\xd0", 10 ), 226 'expected' => str_repeat( "\xd8\xd0", 5 ), 227 'length' => array( 'type' => 'byte', 'length' => 10 ), 71 228 ), 72 229 'tis620' => array( … … 74 231 'value' => "\xccord\xe3ress", 75 232 'expected' => "\xccord\xe3ress", 233 'length' => array( 'type' => 'char', 'length' => 100 ), 234 ), 235 'tis620_char_length' => array( 236 'charset' => 'tis620', 237 'value' => str_repeat( "\xcc\xe3", 10 ), 238 'expected' => str_repeat( "\xcc\xe3", 5 ), 239 'length' => array( 'type' => 'char', 'length' => 10 ), 240 ), 241 'tis620_byte_length' => array( 242 'charset' => 'tis620', 243 'value' => str_repeat( "\xcc\xe3", 10 ), 244 'expected' => str_repeat( "\xcc\xe3", 5 ), 245 'length' => array( 'type' => 'byte', 'length' => 10 ), 76 246 ), 77 247 'false' => array( … … 79 249 'charset' => false, 80 250 'value' => 100, 81 'expected' => 100 251 'expected' => 100, 252 'length' => false, 82 253 ), 83 254 ); … … 95 266 'charset' => 'big5', 96 267 'value' => $big5, 97 'expected' => $big5 268 'expected' => $big5, 269 'length' => array( 'type' => 'char', 'length' => 100 ), 270 ); 271 272 $fields['big5_char_length'] = array( 273 'charset' => 'big5', 274 'value' => str_repeat( $big5, 10 ), 275 'expected' => str_repeat( $big5, 3 ) . 'a', 276 'length' => array( 'type' => 'char', 'length' => 10 ), 277 ); 278 279 $fields['big5_byte_length'] = array( 280 'charset' => 'big5', 281 'value' => str_repeat( $big5, 10 ), 282 'expected' => str_repeat( $big5, 2 ) . 'a', 283 'length' => array( 'type' => 'byte', 'length' => 10 ), 98 284 ); 99 285 } … … 171 357 172 358 $all_ascii_fields = array( 173 'post_content' => array( 'value' => 'foo foo foo!', 'format' => '%s', 'charset' => false),174 'post_excerpt' => array( 'value' => 'bar bar bar!', 'format' => '%s', 'charset' => false),359 'post_content' => array( 'value' => 'foo foo foo!', 'format' => '%s', 'charset' => $charset ), 360 'post_excerpt' => array( 'value' => 'bar bar bar!', 'format' => '%s', 'charset' => $charset ), 175 361 ); 176 362 177 363 // This is the same data used in process_field_charsets_for_nonexistent_table() 178 364 $non_ascii_string_fields = array( 179 'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s', 'charset' => $charset , 'ascii' => false),180 'post_excerpt' => array( 'value' => '¡bar bar bar!', 'format' => '%s', 'charset' => $charset , 'ascii' => false),365 'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s', 'charset' => $charset ), 366 'post_excerpt' => array( 'value' => '¡bar bar bar!', 'format' => '%s', 'charset' => $charset ), 181 367 ); 182 368 … … 545 731 self::$_wpdb->query( $drop ); 546 732 } 733 734 function test_strip_invalid_test_for_column_bails_if_ascii_input_too_long() { 735 global $wpdb; 736 737 // TEXT column 738 $stripped = $wpdb->strip_invalid_text_for_column( $wpdb->comments, 'comment_content', str_repeat( 'A', 65536 ) ); 739 $this->assertEquals( 65535, strlen( $stripped ) ); 740 741 // VARCHAR column 742 $stripped = $wpdb->strip_invalid_text_for_column( $wpdb->comments, 'comment_agent', str_repeat( 'A', 256 ) ); 743 $this->assertEquals( 255, strlen( $stripped ) ); 744 } 547 745 }
Note: See TracChangeset
for help on using the changeset viewer.