Make WordPress Core

Context Navigation

← Previous Changeset
Next Changeset →

Changeset 32391

Timestamp:

05/06/2015 07:16:41 PM (10 years ago)

Author:

mdawaffe

Message:

WPDB: When checking that a string can be sent to MySQL, we shouldn't use mb_convert_encoding(), as it behaves differently to MySQL's character encoding conversion.

Merge of [32364] to the 3.7 branch.

Props mdawaffe, pento, nbachiyski, jorbin, johnjamesjacoby, jeremyfelt.

Location:

Files:

: 8 edited

src/wp-admin/includes/upgrade.php (modified) (2 diffs)
src/wp-includes/compat.php (modified) (1 diff)
src/wp-includes/version.php (modified) (1 diff)
src/wp-includes/wp-db.php (modified) (19 diffs)
tests/phpunit/tests/comment.php (modified) (2 diffs)
tests/phpunit/tests/compat.php (modified) (2 diffs)
tests/phpunit/tests/db.php (modified) (1 diff)
tests/phpunit/tests/db/charset.php (modified) (14 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/3.7/src/wp-admin/includes/upgrade.php

-                      r32318
+                      r32391
         upgrade_373();
+    if ( $wp_current_db_version < 26150 )
+        upgrade_378();
+    if ( $wp_current_db_version < 26151 )
     maybe_disable_link_manager();
 …
  */
 function upgrade_378() {
+}
+/**
+ * Execute changes made in WordPress 3.7.9.
+ *
+ * @since 3.7.9
+ */
+function upgrade_379() {
     global $wp_current_db_version, $wpdb;
     if ( $wp_current_db_version < 26150 ) {
+    if ( $wp_current_db_version < 26151 ) {
         $content_length = $wpdb->get_col_length( $wpdb->comments, 'comment_content' );
+        if ( ! $content_length ) {
+            $content_length = 65535;
+        }
+        if ( false === $content_length ) {
+            $content_length = array(
+                'type'   => 'byte',
+                'length' => 65535,
+            );
+        } elseif ( ! is_array( $content_length ) ) {
+            $length = (int) $content_length > 0 ? (int) $content_length : 65535;
+            $content_length = array(
+                'type'   => 'byte',
+                'length' => $length
+            );
+        }
+        if ( 'byte' !== $content_length['type'] ) {
+            // Sites with malformed DB schemas are on their own.
+            return;
+        }
+        $allowed_length = intval( $content_length['length'] ) - 10;
         $comments = $wpdb->get_results(
             "SELECT comment_ID FROM $wpdb->comments
             WHERE comment_date_gmt > '2015-04-26'
             AND CHAR_LENGTH( comment_content ) >= $content_length
             AND ( comment_content LIKE '%<%' OR comment_content LIKE '%>%' )"
+            "SELECT `comment_ID` FROM `{$wpdb->comments}`
+                WHERE `comment_date_gmt` > '2015-04-26'
+                AND LENGTH( `comment_content` ) >= {$allowed_length}
+                AND ( `comment_content` LIKE '%<%' OR `comment_content` LIKE '%>%' )"
         );

branches/3.7/src/wp-includes/compat.php

-                      r29388
+                      r32391
+}
+if ( !function_exists('mb_substr') ):
+    function mb_substr( $str, $start, $length=null, $encoding=null ) {
+        return _mb_substr($str, $start, $length, $encoding);
+    }
+endif;
+function _mb_substr( $str, $start, $length=null, $encoding=null ) {
+    // the solution below, works only for utf-8, so in case of a different
+    // charset, just use built-in substr
+    $charset = get_option( 'blog_charset' );
+    if ( !in_array( $charset, array('utf8', 'utf-8', 'UTF8', 'UTF-8') ) ) {
+        return is_null( $length )? substr( $str, $start ) : substr( $str, $start, $length);
+    }
+    // use the regex unicode support to separate the UTF-8 characters into an array
+    preg_match_all( '/./us', $str, $match );
+    $chars = is_null( $length )? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length );
+    return implode( '', $chars );
+/**
+ * Returns whether PCRE/u (PCRE_UTF8 modifier) is available for use.
+ *
+ * @ignore
+ * @since 4.2.2
+ * @access private
+ *
+ * @param bool $set - Used for testing only
+ *             null   : default - get PCRE/u capability
+ *             false  : Used for testing - return false for future calls to this function
+ *             'reset': Used for testing - restore default behavior of this function
+ */
+function _wp_can_use_pcre_u( $set = null ) {
+    static $utf8_pcre = 'reset';
+    if ( null !== $set ) {
+        $utf8_pcre = $set;
+    }
+    if ( 'reset' === $utf8_pcre ) {
+        $utf8_pcre = @preg_match( '/^./u', 'a' );
+    }
+    return $utf8_pcre;
+}
+if ( ! function_exists( 'mb_substr' ) ) :
+    function mb_substr( $str, $start, $length = null, $encoding = null ) {
+        return _mb_substr( $str, $start, $length, $encoding );
+    }
+endif;
+/*
+ * Only understands UTF-8 and 8bit.  All other character sets will be treated as 8bit.
+ * For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence.
+ * The behavior of this function for invalid inputs is undefined.
+ */
+function _mb_substr( $str, $start, $length = null, $encoding = null ) {
+    if ( null === $encoding ) {
+        $encoding = get_option( 'blog_charset' );
+    }
+    // The solution below works only for UTF-8,
+    // so in case of a different charset just use built-in substr()
+    if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
+        return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length );
+    }
+    if ( _wp_can_use_pcre_u() ) {
+        // Use the regex unicode support to separate the UTF-8 characters into an array
+        preg_match_all( '/./us', $str, $match );
+        $chars = is_null( $length ) ? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length );
+        return implode( '', $chars );
+    }
+    $regex = '/(
+          [\x00-\x7F]                  # single-byte sequences   0xxxxxxx
+        | [\xC2-\xDF][\x80-\xBF]       # double-byte sequences   110xxxxx 10xxxxxx
+        | \xE0[\xA0-\xBF][\x80-\xBF]   # triple-byte sequences   1110xxxx 10xxxxxx * 2
+        | [\xE1-\xEC][\x80-\xBF]{2}
+        | \xED[\x80-\x9F][\x80-\xBF]
+        | [\xEE-\xEF][\x80-\xBF]{2}
+        | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences   11110xxx 10xxxxxx * 3
+        | [\xF1-\xF3][\x80-\xBF]{3}
+        | \xF4[\x80-\x8F][\x80-\xBF]{2}
+    )/x';
+    $chars = array( '' ); // Start with 1 element instead of 0 since the first thing we do is pop
+    do {
+        // We had some string left over from the last round, but we counted it in that last round.
+        array_pop( $chars );
+        // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string)
+        $pieces = preg_split( $regex, $str, 1000, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
+        $chars = array_merge( $chars, $pieces );
+    } while ( count( $pieces ) > 1 && $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop.
+    return join( '', array_slice( $chars, $start, $length ) );
+}
+if ( ! function_exists( 'mb_strlen' ) ) :
+    function mb_strlen( $str, $encoding = null ) {
+        return _mb_strlen( $str, $encoding );
+    }
+endif;
+/*
+ * Only understands UTF-8 and 8bit.  All other character sets will be treated as 8bit.
+ * For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence.
+ * The behavior of this function for invalid inputs is undefined.
+ */
+function _mb_strlen( $str, $encoding = null ) {
+    if ( null === $encoding ) {
+        $encoding = get_option( 'blog_charset' );
+    }
+    // The solution below works only for UTF-8,
+    // so in case of a different charset just use built-in strlen()
+    if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
+        return strlen( $str );
+    }
+    if ( _wp_can_use_pcre_u() ) {
+        // Use the regex unicode support to separate the UTF-8 characters into an array
+        preg_match_all( '/./us', $str, $match );
+        return count( $match[0] );
+    }
+    $regex = '/(?:
+          [\x00-\x7F]                  # single-byte sequences   0xxxxxxx
+        | [\xC2-\xDF][\x80-\xBF]       # double-byte sequences   110xxxxx 10xxxxxx
+        | \xE0[\xA0-\xBF][\x80-\xBF]   # triple-byte sequences   1110xxxx 10xxxxxx * 2
+        | [\xE1-\xEC][\x80-\xBF]{2}
+        | \xED[\x80-\x9F][\x80-\xBF]
+        | [\xEE-\xEF][\x80-\xBF]{2}
+        | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences   11110xxx 10xxxxxx * 3
+        | [\xF1-\xF3][\x80-\xBF]{3}
+        | \xF4[\x80-\x8F][\x80-\xBF]{2}
+    )/x';
+    $count = 1; // Start at 1 instead of 0 since the first thing we do is decrement
+    do {
+        // We had some string left over from the last round, but we counted it in that last round.
+        $count--;
+        // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string)
+        $pieces = preg_split( $regex, $str, 1000 );
+        // Increment
+        $count += count( $pieces );
+    } while ( $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop.
+    // Fencepost: preg_split() always returns one extra item in the array
+    return --$count;
+}

branches/3.7/src/wp-includes/version.php

r32318	r32391
12	12	* @global int $wp_db_version
13	13	*/
14		$wp_db_version = 26150;
	14	$wp_db_version = 26151;
15	15
16	16	/**

branches/3.7/src/wp-includes/wp-db.php

-                      r32318
+                      r32391
      */
     function _insert_replace_helper( $table, $data, $format = null, $type = 'INSERT' ) {
+        $this->insert_id = 0;
         if ( ! in_array( strtoupper( $type ), array( 'REPLACE', 'INSERT' ) ) ) {
             return false;
 …
         $sql = "$type INTO `$table` ($fields) VALUES ($formats)";
-        $this->insert_id = 0;
         $this->check_current_query = false;
         return $this->query( $this->prepare( $sql, $values ) );
 …
                 // This checks %d/%f versus ! %s because it's sprintf() could take more.
                 $value['charset'] = false;
-            } elseif ( $this->check_ascii( $value['value'] ) ) {
-                // If it's ASCII, then we don't need the charset. We can skip this field.
-                $value['charset'] = false;
             } else {
                 $value['charset'] = $this->get_col_charset( $table, $field );
 …
                     return false;
+                }
-                // This isn't ASCII. Don't have strip_invalid_text() re-check.
-                $value['ascii'] = false;
+            }
 …
                     return false;
+                }
+            }
-            if ( false !== $value['length'] && strlen( $value['value'] ) > $value['length'] ) {
-                return false;
+            }
 …
     /**
      * Retrieve the maximum string length allowed in a given column.
+     * The length may either be specified as a byte length or a character length.
+     *
      * @since 4.2.1
 …
      * @param string $table  Table name.
      * @param string $column Column name.
+     * @return mixed Max column length as an int. False if the column has no
+     *               length. WP_Error object if there was an error.
+     * @return mixed array( 'length' => (int), 'type' => 'byte' | 'char' )
+     *               false if the column has no length (for example, numeric column)
+     *               WP_Error object if there was an error.
      */
     public function get_col_length( $table, $column ) {
 …
         switch( $type ) {
+            case 'char':
+            case 'varchar':
+                return array(
+                    'type'   => 'char',
+                    'length' => (int) $length,
+                );
+                break;
             case 'binary':
-            case 'char':
             case 'varbinary':
+            case 'varchar':
+                return $length;
+                return array(
+                    'type'   => 'byte',
+                    'length' => (int) $length,
+                );
                 break;
             case 'tinyblob':
             case 'tinytext':
+                return 255; // 2^8 - 1
+                return array(
+                    'type'   => 'byte',
+                    'length' => 255,        // 2^8 - 1
+                );
                 break;
             case 'blob':
             case 'text':
+                return 65535; // 2^16 - 1
+                return array(
+                    'type'   => 'byte',
+                    'length' => 65535,      // 2^16 - 1
+                );
                 break;
             case 'mediumblob':
             case 'mediumtext':
+                return 16777215; // 2^24 - 1
+                return array(
+                    'type'   => 'byte',
+                    'length' => 16777215,   // 2^24 - 1
+                );
                 break;
             case 'longblob':
             case 'longtext':
+                return 4294967295; // 2^32 - 1
+                return array(
+                    'type'   => 'byte',
+                    'length' => 4294967295, // 2^32 - 1
+                );
                 break;
             default:
 …
         // If any of the columns don't have one of these collations, it needs more sanity checking.
     protected function strip_invalid_text( $data ) {
-        // Some multibyte character sets that we can check in PHP.
-        $mb_charsets = array(
-            'ascii'   => 'ASCII',
-            'big5'    => 'BIG-5',
-            'eucjpms' => 'eucJP-win',
-            'gb2312'  => 'EUC-CN',
-            'ujis'    => 'EUC-JP',
-            'utf32'   => 'UTF-32',
-        );
-        $supported_charsets = array();
-        if ( function_exists( 'mb_list_encodings' ) ) {
-            $supported_charsets = mb_list_encodings();
+        }
         $db_check_string = false;
 …
             $charset = $value['charset'];
+            // Column isn't a string, or is latin1, which will will happily store anything.
+            if ( false === $charset || 'latin1' === $charset ) {
+            if ( is_array( $value['length'] ) ) {
+                $length = $value['length']['length'];
+            } else {
+                $length = false;
+            }
+            // There's no charset to work with.
+            if ( false === $charset ) {
                 continue;
+            }
+            // Column isn't a string.
             if ( ! is_string( $value['value'] ) ) {
                 continue;
+            }
+            // ASCII is always OK.
+            if ( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) ) {
+                continue;
+            }
+            // Convert the text locally.
+            if ( $supported_charsets ) {
+                if ( isset( $mb_charsets[ $charset ] ) && in_array( $mb_charsets[ $charset ], $supported_charsets ) ) {
+                    $value['value'] = mb_convert_encoding( $value['value'], $mb_charsets[ $charset ], $mb_charsets[ $charset ] );
+            $truncate_by_byte_length = 'byte' === $value['length']['type'];
+            $needs_validation = true;
+            if (
+                // latin1 can store any byte sequence
+                'latin1' === $charset
+            ||
+                // ASCII is always OK.
+                ( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) )
+            ) {
+                $truncate_by_byte_length = true;
+                $needs_validation = false;
+            }
+            if ( $truncate_by_byte_length ) {
+                mbstring_binary_safe_encoding();
+                if ( false !== $length && strlen( $value['value'] ) > $length ) {
+                    $value['value'] = substr( $value['value'], 0, $length );
+                }
+                reset_mbstring_encoding();
+                if ( ! $needs_validation ) {
                     continue;
+                }
 …
             // utf8 can be handled by regex, which is a bunch faster than a DB lookup.
             if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) {
+            if ( ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) && function_exists( 'mb_strlen' ) ) {
                 $regex = '/
+                    (
 …
                         |   [\xEE-\xEF][\x80-\xBF]{2}';
                 if ( 'utf8mb4' === $charset) {
+                if ( 'utf8mb4' === $charset ) {
                     $regex .= '
                         |    \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences   11110xxx 10xxxxxx * 3
 …
                     /x';
                 $value['value'] = preg_replace( $regex, '$1', $value['value'] );
+                if ( false !== $length && mb_strlen( $value['value'], 'UTF-8' ) > $length ) {
+                    $value['value'] = mb_substr( $value['value'], 0, $length, 'UTF-8' );
+                }
                 continue;
+            }
 …
+                    }
+                    // Split the CONVERT() calls by charset, so we can make sure the connection is right
+                    $queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( %s USING {$value['charset']} )", $value['value'] );
+                    // We're going to need to truncate by characters or bytes, depending on the length value we have.
+                    if ( 'byte' === $value['length']['type'] ) {
+                        // Split the CONVERT() calls by charset, so we can make sure the connection is right
+                        $queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( LEFT( CONVERT( %s USING binary ), %d ) USING {$value['charset']} )", $value['value'], $value['length']['length'] );
+                    } else {
+                        $queries[ $value['charset'] ][ $col ] = $this->prepare( "LEFT( CONVERT( %s USING {$value['charset']} ), %d )", $value['value'], $value['length']['length'] );
+                    }
                     unset( $data[ $col ]['db'] );
+                }
 …
                 $this->check_current_query = false;
+                $row = $this->get_row( "SELECT " . implode( ', ', $query ), ARRAY_N );
+                $sql = array();
+                foreach ( $query as $column => $column_query ) {
+                    $sql[] = $column_query . " AS x_$column";
+                }
+                $row = $this->get_row( "SELECT " . implode( ', ', $sql ), ARRAY_A );
                 if ( ! $row ) {
                     $this->set_charset( $this->dbh, $connection_charset );
 …
+                }
+                $cols = array_keys( $query );
+                $col_count = count( $cols );
+                for ( $ii = 0; $ii < $col_count; $ii++ ) {
+                    $data[ $cols[ $ii ] ]['value'] = $row[ $ii ];
+                foreach ( array_keys( $query ) as $column ) {
+                    $data[ $column ]['value'] = $row["x_$column"];
+                }
+            }
 …
             'charset' => $charset,
             'ascii'   => false,
+            'length'  => false,
         );
 …
      */
     public function strip_invalid_text_for_column( $table, $column, $value ) {
         if ( ! is_string( $value ) || $this->check_ascii( $value ) ) {
+        if ( ! is_string( $value ) ) {
             return $value;
+        }
 …
                 'value'   => $value,
                 'charset' => $charset,
                 'ascii'   => false,
+                'length'  => $this->get_col_length( $table, $column ),
+            )
         );

branches/3.7/tests/phpunit/tests/comment.php

-                      r32318
+                      r32391
+        }
+        $post_id = $this->factory->post->create();
+        $u = $this->factory->user->create();
+        $post_id = $this->factory->post->create( array( 'post_author' => $u ) );
         $data = array(
 …
         );
+        add_filter( 'pre_option_moderation_notify', '__return_zero' );
         $id = wp_new_comment( $data );
+        remove_filter( 'pre_option_moderation_notify', '__return_zero' );
         $this->assertFalse( $id );
+        $this->assertEmpty( $id );
         // Cleanup.

branches/3.7/tests/phpunit/tests/compat.php

-                      r25002
+                      r32391
 /**
  * @group compat
+ * @group security-153
  */
 class Tests_Compat extends WP_UnitTestCase {
+    function test_mb_substr() {
+        $this->assertEquals('баб', _mb_substr('баба', 0, 3));
+        $this->assertEquals('баб', _mb_substr('баба', 0, -1));
+        $this->assertEquals('баб', _mb_substr('баба', 0, -1));
+        $this->assertEquals('I am your б', _mb_substr('I am your баба', 0, 11));
+    function utf8_string_lengths() {
+        return array(
+            //                     string, character_length, byte_length
+            array(                 'баба',                4,           8 ),
+            array(                  'баб',                3,           6 ),
+            array(          'I am your б',               11,          12 ),
+            array(           '1111111111',               10,          10 ),
+            array(           '²²²²²²²²²²',               10,          20 ),
+            array( '３３３３３３３３３３',               10,          30 ),
+            array(           '𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜',               10,          40 ),
+            array(      '1²３𝟜1²３𝟜1²３𝟜',               12,          30 ),
+        );
+    }
+    function utf8_substrings() {
+        return array(
+            //               string, start, length, character_substring,   byte_substring
+            array(           'баба',     0,      3,               'баб',          "б\xD0" ),
+            array(           'баба',     0,     -1,               'баб',        "баб\xD0" ),
+            array(           'баба',     1,   null,               'аба',        "\xB1аба" ),
+            array(           'баба',    -3,   null,               'аба',          "\xB1а" ),
+            array(           'баба',    -3,      2,                'аб',       "\xB1\xD0" ),
+            array(           'баба',    -1,      2,                 'а',           "\xB0" ),
+            array( 'I am your баба',     0,     11,       'I am your б', "I am your \xD0" ),
+        );
+    }
+    /**
+     * @dataProvider utf8_string_lengths
+     */
+    function test_mb_strlen( $string, $expected_character_length ) {
+        $this->assertEquals( $expected_character_length, _mb_strlen( $string, 'UTF-8' ) );
+    }
+    /**
+     * @dataProvider utf8_string_lengths
+     */
+    function test_mb_strlen_via_regex( $string, $expected_character_length ) {
+        _wp_can_use_pcre_u( false );
+        $this->assertEquals( $expected_character_length, _mb_strlen( $string, 'UTF-8' ) );
+        _wp_can_use_pcre_u( 'reset' );
+    }
+    /**
+     * @dataProvider utf8_string_lengths
+     */
+    function test_8bit_mb_strlen( $string, $expected_character_length, $expected_byte_length ) {
+        $this->assertEquals( $expected_byte_length, _mb_strlen( $string, '8bit' ) );
+    }
+    /**
+     * @dataProvider utf8_substrings
+     */
+    function test_mb_substr( $string, $start, $length, $expected_character_substring ) {
+        $this->assertEquals( $expected_character_substring, _mb_substr( $string, $start, $length, 'UTF-8' ) );
+    }
+    /**
+     * @dataProvider utf8_substrings
+     */
+    function test_mb_substr_via_regex( $string, $start, $length, $expected_character_substring ) {
+        _wp_can_use_pcre_u( false );
+        $this->assertEquals( $expected_character_substring, _mb_substr( $string, $start, $length, 'UTF-8' ) );
+        _wp_can_use_pcre_u( 'reset' );
+    }
+    /**
+     * @dataProvider utf8_substrings
+     */
+    function test_8bit_mb_substr( $string, $start, $length, $expected_character_substring, $expected_byte_substring ) {
+        $this->assertEquals( $expected_byte_substring, _mb_substr( $string, $start, $length, '8bit' ) );
+    }
+    function test_mb_substr_phpcore(){
+        /* https://github.com/php/php-src/blob/php-5.6.8/ext/mbstring/tests/mb_substr_basic.phpt */
+        $string_ascii = 'ABCDEF';
+        $string_mb = base64_decode('5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII=');
+        $this->assertEquals( 'DEF', _mb_substr($string_ascii, 3) );
+        $this->assertEquals( 'DEF', _mb_substr($string_ascii, 3, 5, 'ISO-8859-1') );
+        // specific latin-1 as that is the default the core php test opporates under
+        $this->assertEquals( 'peacrOiqng==' , base64_encode( _mb_substr($string_mb, 2, 7, 'latin-1' ) ) );
+        $this->assertEquals( '6Kqe44OG44Kt44K544OI44Gn44GZ', base64_encode( _mb_substr($string_mb, 2, 7, 'utf-8') ) );
+        /* https://github.com/php/php-src/blob/php-5.6.8/ext/mbstring/tests/mb_substr_variation1.phpt */
+        $start = 0;
+        $length = 5;
+        $unset_var = 10;
+        unset ($unset_var);
+        $heredoc = <<<EOT
+hello world
+EOT;
+        $inputs = array(
+        /*1*/  0,
+,
+,
+               -2345,
+               // float data
+        /*5*/  10.5,
+               -10.5,
+.3456789000e10,
+.3456789000E-10,
+               .5,
+               // null data
+        /*10*/ NULL,
+               null,
+               // boolean data
+        /*12*/ true,
+               false,
+               TRUE,
+               FALSE,
+               // empty data
+        /*16*/ "",
+               '',
+               // string data
+        /*18*/ "string",
+               'string',
+               $heredoc,
+               // object data
+        /*21*/ new classA(),
+               // undefined data
+        /*22*/ @$undefined_var,
+               // unset data
+        /*23*/ @$unset_var,
+        );
+        $outputs = array(
+            "0",
+            "1",
+            "12345",
+            "-2345",
+            "10.5",
+            "-10.5",
+            "12345",
+            "1.234",
+            "0.5",
+            "",
+            "",
+            "1",
+            "",
+            "1",
+            "",
+            "",
+            "",
+            "strin",
+            "strin",
+            "hello",
+            "Class",
+            "",
+            "",
+        );
+        $iterator = 0;
+        foreach($inputs as $input) {
+            $this->assertEquals( $outputs[$iterator] ,  _mb_substr($input, $start, $length) );
+            $iterator++;
+        }
+    }
 …
+    }
+}
+/* used in test_mb_substr_phpcore */
+class classA {
+    public function __toString() {
+        return "Class A object";
+    }
+}

branches/3.7/tests/phpunit/tests/db.php

r32318	r32391
511	511	'format' => '%s',
512	512	'charset' => $expected_charset,
513		~~'ascii' => false,~~
514	513	'length' => $wpdb->get_col_length( $wpdb->posts, 'post_content' ),
515	514	)

branches/3.7/tests/phpunit/tests/db/charset.php

-                      r32275
+                      r32391
+ *
  * @group wpdb
+ * @group security-153
  */
 class Tests_DB_Charset extends WP_UnitTestCase {
 …
                 'charset'  => 'latin1',
                 'value'    => "\xf0\x9f\x8e\xb7",
+                'expected' => "\xf0\x9f\x8e\xb7"
+                'expected' => "\xf0\x9f\x8e\xb7",
+                'length'   => array( 'type' => 'char', 'length' => 100 ),
+            ),
+            'latin1_char_length' => array(
+                // latin1. latin1 never changes.
+                'charset'  => 'latin1',
+                'value'    => str_repeat( 'A', 11 ),
+                'expected' => str_repeat( 'A', 10 ),
+                'length'   => array( 'type' => 'char', 'length' => 10 ),
+            ),
+            'latin1_byte_length' => array(
+                // latin1. latin1 never changes.
+                'charset'  => 'latin1',
+                'value'    => str_repeat( 'A', 11 ),
+                'expected' => str_repeat( 'A', 10 ),
+                'length'   => array( 'type' => 'byte', 'length' => 10 ),
             ),
             'ascii' => array(
 …
                 'charset'  => 'ascii',
                 'value'    => 'Hello World',
+                'expected' => 'Hello World'
+                'expected' => 'Hello World',
+                'length'   => array( 'type' => 'char', 'length' => 100 ),
+            ),
+            'ascii_char_length' => array(
+                // ascii gets special treatment, make sure it's covered
+                'charset'  => 'ascii',
+                'value'    => str_repeat( 'A', 11 ),
+                'expected' => str_repeat( 'A', 10 ),
+                'length'   => array( 'type' => 'char', 'length' => 10 ),
+            ),
+            'ascii_byte_length' => array(
+                // ascii gets special treatment, make sure it's covered
+                'charset'  => 'ascii',
+                'value'    => str_repeat( 'A', 11 ),
+                'expected' => str_repeat( 'A', 10 ),
+                'length'   => array( 'type' => 'byte', 'length' => 10 ),
             ),
             'utf8' => array(
 …
                 'charset'  => 'utf8',
                 'value'    => "H€llo\xf0\x9f\x98\x88World¢",
+                'expected' => 'H€lloWorld¢'
+                'expected' => 'H€lloWorld¢',
+                'length'   => array( 'type' => 'char', 'length' => 100 ),
+            ),
+            'utf8_23char_length' => array(
+                // utf8 only allows <= 3-byte chars
+                'charset'  => 'utf8',
+                'value'    => str_repeat( "²３", 10 ),
+                'expected' => str_repeat( "²３", 5 ),
+                'length'   => array( 'type' => 'char', 'length' => 10 ),
+            ),
+            'utf8_23byte_length' => array(
+                // utf8 only allows <= 3-byte chars
+                'charset'  => 'utf8',
+                'value'    => str_repeat( "²３", 10 ),
+                'expected' => "²３²３",
+                'length'   => array( 'type' => 'byte', 'length' => 10 ),
+            ),
+            'utf8_3char_length' => array(
+                // utf8 only allows <= 3-byte chars
+                'charset'  => 'utf8',
+                'value'    => str_repeat( "３", 11 ),
+                'expected' => str_repeat( "３", 10 ),
+                'length'   => array( 'type' => 'char', 'length' => 10 ),
+            ),
+            'utf8_3byte_length' => array(
+                // utf8 only allows <= 3-byte chars
+                'charset'  => 'utf8',
+                'value'    => str_repeat( "３", 11 ),
+                'expected' => "３３３",
+                'length'   => array( 'type' => 'byte', 'length' => 10 ),
             ),
             'utf8mb3' => array(
 …
                 'charset'  => 'utf8mb3',
                 'value'    => "H€llo\xf0\x9f\x98\x88World¢",
+                'expected' => 'H€lloWorld¢'
+                'expected' => 'H€lloWorld¢',
+                'length'   => array( 'type' => 'char', 'length' => 100 ),
+            ),
+            'utf8mb3_23char_length' => array(
+                // utf8mb3 should behave the same an utf8
+                'charset'  => 'utf8mb3',
+                'value'    => str_repeat( "²３", 10 ),
+                'expected' => str_repeat( "²３", 5 ),
+                'length'   => array( 'type' => 'char', 'length' => 10 ),
+            ),
+            'utf8mb3_23byte_length' => array(
+                // utf8mb3 should behave the same an utf8
+                'charset'  => 'utf8mb3',
+                'value'    => str_repeat( "²３", 10 ),
+                'expected' => "²３²３",
+                'length'   => array( 'type' => 'byte', 'length' => 10 ),
+            ),
+            'utf8mb3_3char_length' => array(
+                // utf8mb3 should behave the same an utf8
+                'charset'  => 'utf8mb3',
+                'value'    => str_repeat( "３", 11 ),
+                'expected' => str_repeat( "３", 10 ),
+                'length'   => array( 'type' => 'char', 'length' => 10 ),
+            ),
+            'utf8mb3_3byte_length' => array(
+                // utf8mb3 should behave the same an utf8
+                'charset'  => 'utf8mb3',
+                'value'    => str_repeat( "３", 10 ),
+                'expected' => "３３３",
+                'length'   => array( 'type' => 'byte', 'length' => 10 ),
             ),
             'utf8mb4' => array(
 …
                 'charset'  => 'utf8mb4',
                 'value'    => "H€llo\xf0\x9f\x98\x88World¢",
+                'expected' => "H€llo\xf0\x9f\x98\x88World¢"
+                'expected' => "H€llo\xf0\x9f\x98\x88World¢",
+                'length'   => array( 'type' => 'char', 'length' => 100 ),
+            ),
+            'utf8mb4_234char_length' => array(
+                // utf8mb4 allows 4-byte characters, too
+                'charset'  => 'utf8mb4',
+                'value'    => str_repeat( "²３𝟜", 10 ),
+                'expected' => "²３𝟜²３𝟜²３𝟜²",
+                'length'   => array( 'type' => 'char', 'length' => 10 ),
+            ),
+            'utf8mb4_234byte_length' => array(
+                // utf8mb4 allows 4-byte characters, too
+                'charset'  => 'utf8mb4',
+                'value'    => str_repeat( "²３𝟜", 10 ),
+                'expected' => "²３𝟜",
+                'length'   => array( 'type' => 'byte', 'length' => 10 ),
+            ),
+            'utf8mb4_4char_length' => array(
+                // utf8mb4 allows 4-byte characters, too
+                'charset'  => 'utf8mb4',
+                'value'    => str_repeat( "𝟜", 11 ),
+                'expected' => str_repeat( "𝟜", 10 ),
+                'length'   => array( 'type' => 'char', 'length' => 10 ),
+            ),
+            'utf8mb4_4byte_length' => array(
+                // utf8mb4 allows 4-byte characters, too
+                'charset'  => 'utf8mb4',
+                'value'    => str_repeat( "𝟜", 10 ),
+                'expected' => "𝟜𝟜",
+                'length'   => array( 'type' => 'byte', 'length' => 10 ),
             ),
             'koi8r' => array(
 …
                 'value'    => "\xfdord\xf2ress",
                 'expected' => "\xfdord\xf2ress",
+                'length'   => array( 'type' => 'char', 'length' => 100 ),
+            ),
+            'koi8r_char_length' => array(
+                'charset'  => 'koi8r',
+                'value'    => str_repeat( "\xfd\xf2", 10 ),
+                'expected' => str_repeat( "\xfd\xf2", 5 ),
+                'length'   => array( 'type' => 'char', 'length' => 10 ),
+            ),
+            'koi8r_byte_length' => array(
+                'charset'  => 'koi8r',
+                'value'    => str_repeat( "\xfd\xf2", 10 ),
+                'expected' => str_repeat( "\xfd\xf2", 5 ),
+                'length'   => array( 'type' => 'byte', 'length' => 10 ),
             ),
             'hebrew' => array(
 …
                 'value'    => "\xf9ord\xf7ress",
                 'expected' => "\xf9ord\xf7ress",
+                'length'   => array( 'type' => 'char', 'length' => 100 ),
+            ),
+            'hebrew_char_length' => array(
+                'charset'  => 'hebrew',
+                'value'    => str_repeat( "\xf9\xf7", 10 ),
+                'expected' => str_repeat( "\xf9\xf7", 5 ),
+                'length'   => array( 'type' => 'char', 'length' => 10 ),
+            ),
+            'hebrew_byte_length' => array(
+                'charset'  => 'hebrew',
+                'value'    => str_repeat( "\xf9\xf7", 10 ),
+                'expected' => str_repeat( "\xf9\xf7", 5 ),
+                'length'   => array( 'type' => 'byte', 'length' => 10 ),
             ),
             'cp1251' => array(
 …
                 'value'    => "\xd8ord\xd0ress",
                 'expected' => "\xd8ord\xd0ress",
+                'length'   => array( 'type' => 'char', 'length' => 100 ),
+            ),
+            'cp1251_char_length' => array(
+                'charset'  => 'cp1251',
+                'value'    => str_repeat( "\xd8\xd0", 10 ),
+                'expected' => str_repeat( "\xd8\xd0", 5 ),
+                'length'   => array( 'type' => 'char', 'length' => 10 ),
+            ),
+            'cp1251_byte_length' => array(
+                'charset'  => 'cp1251',
+                'value'    => str_repeat( "\xd8\xd0", 10 ),
+                'expected' => str_repeat( "\xd8\xd0", 5 ),
+                'length'   => array( 'type' => 'byte', 'length' => 10 ),
             ),
             'tis620' => array(
 …
                 'value'    => "\xccord\xe3ress",
                 'expected' => "\xccord\xe3ress",
+                'length'   => array( 'type' => 'char', 'length' => 100 ),
+            ),
+            'tis620_char_length' => array(
+                'charset'  => 'tis620',
+                'value'    => str_repeat( "\xcc\xe3", 10 ),
+                'expected' => str_repeat( "\xcc\xe3", 5 ),
+                'length'   => array( 'type' => 'char', 'length' => 10 ),
+            ),
+            'tis620_byte_length' => array(
+                'charset'  => 'tis620',
+                'value'    => str_repeat( "\xcc\xe3", 10 ),
+                'expected' => str_repeat( "\xcc\xe3", 5 ),
+                'length'   => array( 'type' => 'byte', 'length' => 10 ),
             ),
             'false' => array(
 …
                 'charset'  => false,
                 'value'    => 100,
+                'expected' => 100
+                'expected' => 100,
+                'length'   => false,
             ),
         );
 …
                 'charset'  => 'big5',
                 'value'    => $big5,
+                'expected' => $big5
+                'expected' => $big5,
+                'length'   => array( 'type' => 'char', 'length' => 100 ),
+            );
+            $fields['big5_char_length'] = array(
+                'charset'  => 'big5',
+                'value'    => str_repeat( $big5, 10 ),
+                'expected' => str_repeat( $big5, 3 ) . 'a',
+                'length'   => array( 'type' => 'char', 'length' => 10 ),
+            );
+            $fields['big5_byte_length'] = array(
+                'charset'  => 'big5',
+                'value'    => str_repeat( $big5, 10 ),
+                'expected' => str_repeat( $big5, 2 ) . 'a',
+                'length'   => array( 'type' => 'byte', 'length' => 10 ),
             );
+        }
 …
         $all_ascii_fields = array(
             'post_content' => array( 'value' => 'foo foo foo!', 'format' => '%s', 'charset' => false ),
             'post_excerpt' => array( 'value' => 'bar bar bar!', 'format' => '%s', 'charset' => false ),
+            'post_content' => array( 'value' => 'foo foo foo!', 'format' => '%s', 'charset' => $charset ),
+            'post_excerpt' => array( 'value' => 'bar bar bar!', 'format' => '%s', 'charset' => $charset ),
         );
         // This is the same data used in process_field_charsets_for_nonexistent_table()
         $non_ascii_string_fields = array(
             'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s', 'charset' => $charset, 'ascii' => false ),
             'post_excerpt' => array( 'value' => '¡bar bar bar!', 'format' => '%s', 'charset' => $charset, 'ascii' => false ),
+            'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s', 'charset' => $charset ),
+            'post_excerpt' => array( 'value' => '¡bar bar bar!', 'format' => '%s', 'charset' => $charset ),
         );
 …
         self::$_wpdb->query( $drop );
+    }
+    function test_strip_invalid_test_for_column_bails_if_ascii_input_too_long() {
+        global $wpdb;
+        // TEXT column
+        $stripped = $wpdb->strip_invalid_text_for_column( $wpdb->comments, 'comment_content', str_repeat( 'A', 65536 ) );
+        $this->assertEquals( 65535, strlen( $stripped ) );
+        // VARCHAR column
+        $stripped = $wpdb->strip_invalid_text_for_column( $wpdb->comments, 'comment_agent', str_repeat( 'A', 256 ) );
+        $this->assertEquals( 255, strlen( $stripped ) );
+    }
+}

Note: See TracChangeset for help on using the changeset viewer.

Trac UI Preferences

Download in other formats: