Changeset 32163
- Timestamp:
- 04/20/2015 05:08:00 AM (10 years ago)
- Location:
- branches/4.1
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/4.1/src/wp-includes/wp-db.php
r30807 r32163 145 145 146 146 /** 147 * Cached column info, for sanity checking data before inserting 148 * 149 * @since 4.2.0 150 * @access protected 151 * @var array 152 */ 153 protected $col_meta = array(); 154 155 /** 156 * Calculated character sets on tables 157 * 158 * @since 4.2.0 159 * @access protected 160 * @var array 161 */ 162 protected $table_charset = array(); 163 164 /** 165 * Whether text fields in the current query need to be sanity checked. 166 * 167 * @since 4.2.0 168 * @access protected 169 * @var bool 170 */ 171 protected $check_current_query = true; 172 173 /** 174 * Flag to ensure we don't run into recursion problems when checking the collation. 175 * 176 * @since 4.2.0 177 * @access protected 178 * @see wpdb::check_collation() 179 * @var boolean 180 */ 181 protected $checking_collation = false; 182 183 /** 147 184 * Saved info on the table column 148 185 * … … 648 685 */ 649 686 public function __set( $name, $value ) { 687 $protected_members = array( 688 'col_meta', 689 'table_charset', 690 'check_current_query', 691 ); 692 if ( in_array( $name, $protected_members, true ) ) { 693 return; 694 } 650 695 $this->$name = $value; 651 696 } … … 717 762 if ( ! empty( $collate ) ) 718 763 $query .= $this->prepare( ' COLLATE %s', $collate ); 719 mysqli_query( $ query, $dbh);764 mysqli_query( $dbh, $query ); 720 765 } 721 766 } else { … … 1542 1587 public function query( $query ) { 1543 1588 if ( ! $this->ready ) { 1589 $this->check_current_query = true; 1544 1590 return false; 1545 1591 } … … 1562 1608 $this->func_call = "\$db->query(\"$query\")"; 1563 1609 1610 // If we're writing to the database, make sure the query will write safely. 1611 if ( $this->check_current_query && ! $this->check_ascii( $query ) ) { 1612 $stripped_query = $this->strip_invalid_text_from_query( $query ); 1613 // strip_invalid_text_from_query() can perform queries, so we need 1614 // to flush again, just to make sure everything is clear. 1615 $this->flush(); 1616 if ( $stripped_query !== $query ) { 1617 $this->insert_id = 0; 1618 return false; 1619 } 1620 } 1621 1622 $this->check_current_query = true; 1564 1623 // Keep track of the last query for debug.. 1565 1624 $this->last_query = $query; … … 1731 1790 */ 1732 1791 function _insert_replace_helper( $table, $data, $format = null, $type = 'INSERT' ) { 1733 if ( ! in_array( strtoupper( $type ), array( 'REPLACE', 'INSERT' ) ) ) 1792 if ( ! in_array( strtoupper( $type ), array( 'REPLACE', 'INSERT' ) ) ) { 1734 1793 return false; 1794 } 1795 1796 $data = $this->process_fields( $table, $data, $format ); 1797 if ( false === $data ) { 1798 return false; 1799 } 1800 1801 $formats = $values = array(); 1802 foreach ( $data as $value ) { 1803 $formats[] = $value['format']; 1804 $values[] = $value['value']; 1805 } 1806 1807 $fields = '`' . implode( '`, `', array_keys( $data ) ) . '`'; 1808 $formats = implode( ', ', $formats ); 1809 1810 $sql = "$type INTO `$table` ($fields) VALUES ($formats)"; 1811 1735 1812 $this->insert_id = 0; 1736 $formats = $format = (array) $format; 1737 $fields = array_keys( $data ); 1738 $formatted_fields = array(); 1739 foreach ( $fields as $field ) { 1740 if ( !empty( $format ) ) 1741 $form = ( $form = array_shift( $formats ) ) ? $form : $format[0]; 1742 elseif ( isset( $this->field_types[$field] ) ) 1743 $form = $this->field_types[$field]; 1744 else 1745 $form = '%s'; 1746 $formatted_fields[] = $form; 1747 } 1748 $sql = "{$type} INTO `$table` (`" . implode( '`,`', $fields ) . "`) VALUES (" . implode( ",", $formatted_fields ) . ")"; 1749 return $this->query( $this->prepare( $sql, $data ) ); 1813 $this->check_current_query = false; 1814 return $this->query( $this->prepare( $sql, $values ) ); 1750 1815 } 1751 1816 … … 1770 1835 */ 1771 1836 public function update( $table, $data, $where, $format = null, $where_format = null ) { 1772 if ( ! is_array( $data ) || ! is_array( $where ) ) 1837 if ( ! is_array( $data ) || ! is_array( $where ) ) { 1773 1838 return false; 1774 1775 $formats = $format = (array) $format; 1776 $bits = $wheres = array(); 1777 foreach ( (array) array_keys( $data ) as $field ) { 1778 if ( !empty( $format ) ) 1779 $form = ( $form = array_shift( $formats ) ) ? $form : $format[0]; 1780 elseif ( isset($this->field_types[$field]) ) 1781 $form = $this->field_types[$field]; 1782 else 1783 $form = '%s'; 1784 $bits[] = "`$field` = {$form}"; 1785 } 1786 1787 $where_formats = $where_format = (array) $where_format; 1788 foreach ( (array) array_keys( $where ) as $field ) { 1789 if ( !empty( $where_format ) ) 1790 $form = ( $form = array_shift( $where_formats ) ) ? $form : $where_format[0]; 1791 elseif ( isset( $this->field_types[$field] ) ) 1792 $form = $this->field_types[$field]; 1793 else 1794 $form = '%s'; 1795 $wheres[] = "`$field` = {$form}"; 1796 } 1797 1798 $sql = "UPDATE `$table` SET " . implode( ', ', $bits ) . ' WHERE ' . implode( ' AND ', $wheres ); 1799 return $this->query( $this->prepare( $sql, array_merge( array_values( $data ), array_values( $where ) ) ) ); 1839 } 1840 1841 $data = $this->process_fields( $table, $data, $format ); 1842 if ( false === $data ) { 1843 return false; 1844 } 1845 $where = $this->process_fields( $table, $where, $where_format ); 1846 if ( false === $where ) { 1847 return false; 1848 } 1849 1850 $fields = $conditions = $values = array(); 1851 foreach ( $data as $field => $value ) { 1852 $fields[] = "`$field` = " . $value['format']; 1853 $values[] = $value['value']; 1854 } 1855 foreach ( $where as $field => $value ) { 1856 $conditions[] = "`$field` = " . $value['format']; 1857 $values[] = $value['value']; 1858 } 1859 1860 $fields = implode( ', ', $fields ); 1861 $conditions = implode( ' AND ', $conditions ); 1862 1863 $sql = "UPDATE `$table` SET $fields WHERE $conditions"; 1864 1865 $this->check_current_query = false; 1866 return $this->query( $this->prepare( $sql, $values ) ); 1800 1867 } 1801 1868 … … 1817 1884 */ 1818 1885 public function delete( $table, $where, $where_format = null ) { 1819 if ( ! is_array( $where ) ) 1886 if ( ! is_array( $where ) ) { 1820 1887 return false; 1821 1822 $wheres = array(); 1823 1824 $where_formats = $where_format = (array) $where_format; 1825 1826 foreach ( array_keys( $where ) as $field ) { 1827 if ( !empty( $where_format ) ) { 1828 $form = ( $form = array_shift( $where_formats ) ) ? $form : $where_format[0]; 1888 } 1889 1890 $where = $this->process_fields( $table, $where, $where_format ); 1891 if ( false === $where ) { 1892 return false; 1893 } 1894 1895 $conditions = $values = array(); 1896 foreach ( $where as $field => $value ) { 1897 $conditions[] = "`$field` = " . $value['format']; 1898 $values[] = $value['value']; 1899 } 1900 1901 $conditions = implode( ' AND ', $conditions ); 1902 1903 $sql = "DELETE FROM `$table` WHERE $conditions"; 1904 1905 $this->check_current_query = false; 1906 return $this->query( $this->prepare( $sql, $values ) ); 1907 } 1908 1909 /** 1910 * Processes arrays of field/value pairs and field formats. 1911 * 1912 * This is a helper method for wpdb's CRUD methods, which take field/value 1913 * pairs for inserts, updates, and where clauses. This method first pairs 1914 * each value with a format. Then it determines the charset of that field, 1915 * using that to determine if any invalid text would be stripped. If text is 1916 * stripped, then field processing is rejected and the query fails. 1917 * 1918 * @since 4.2.0 1919 * @access protected 1920 * 1921 * @param string $table Table name. 1922 * @param array $data Field/value pair. 1923 * @param mixed $format Format for each field. 1924 * @return array|bool Returns an array of fields that contain paired values 1925 * and formats. Returns false for invalid values. 1926 */ 1927 protected function process_fields( $table, $data, $format ) { 1928 $data = $this->process_field_formats( $data, $format ); 1929 $data = $this->process_field_charsets( $data, $table ); 1930 if ( false === $data ) { 1931 return false; 1932 } 1933 1934 $converted_data = $this->strip_invalid_text( $data ); 1935 1936 if ( $data !== $converted_data ) { 1937 return false; 1938 } 1939 1940 return $data; 1941 } 1942 1943 /** 1944 * Prepares arrays of value/format pairs as passed to wpdb CRUD methods. 1945 * 1946 * @since 4.2.0 1947 * @access protected 1948 * 1949 * @param array $data Array of fields to values. 1950 * @param mixed $format Formats to be mapped to the values in $data. 1951 * @return array Array, keyed by field names with values being an array 1952 * of 'value' and 'format' keys. 1953 */ 1954 protected function process_field_formats( $data, $format ) { 1955 $formats = $original_formats = (array) $format; 1956 1957 foreach ( $data as $field => $value ) { 1958 $value = array( 1959 'value' => $value, 1960 'format' => '%s', 1961 ); 1962 1963 if ( ! empty( $format ) ) { 1964 $value['format'] = array_shift( $formats ); 1965 if ( ! $value['format'] ) { 1966 $value['format'] = reset( $original_formats ); 1967 } 1829 1968 } elseif ( isset( $this->field_types[ $field ] ) ) { 1830 $form = $this->field_types[ $field ]; 1969 $value['format'] = $this->field_types[ $field ]; 1970 } 1971 1972 $data[ $field ] = $value; 1973 } 1974 1975 return $data; 1976 } 1977 1978 /** 1979 * Adds field charsets to field/value/format arrays generated by 1980 * the {@see wpdb::process_field_formats()} method. 1981 * 1982 * @since 4.2.0 1983 * @access protected 1984 * 1985 * @param array $data As it comes from the {@see wpdb::process_field_formats()} method. 1986 * @param string $table Table name. 1987 * @return The same array as $data with additional 'charset' keys. 1988 */ 1989 protected function process_field_charsets( $data, $table ) { 1990 foreach ( $data as $field => $value ) { 1991 if ( '%d' === $value['format'] || '%f' === $value['format'] ) { 1992 // We can skip this field if we know it isn't a string. 1993 // This checks %d/%f versus ! %s because it's sprintf() could take more. 1994 $value['charset'] = false; 1995 } elseif ( $this->check_ascii( $value['value'] ) ) { 1996 // If it's ASCII, then we don't need the charset. We can skip this field. 1997 $value['charset'] = false; 1831 1998 } else { 1832 $form = '%s'; 1833 } 1834 1835 $wheres[] = "$field = $form"; 1836 } 1837 1838 $sql = "DELETE FROM $table WHERE " . implode( ' AND ', $wheres ); 1839 return $this->query( $this->prepare( $sql, $where ) ); 1999 $value['charset'] = $this->get_col_charset( $table, $field ); 2000 if ( is_wp_error( $value['charset'] ) ) { 2001 return false; 2002 } 2003 2004 // This isn't ASCII. Don't have strip_invalid_text() re-check. 2005 $value['ascii'] = false; 2006 } 2007 2008 $data[ $field ] = $value; 2009 } 2010 2011 return $data; 1840 2012 } 1841 2013 … … 1856 2028 public function get_var( $query = null, $x = 0, $y = 0 ) { 1857 2029 $this->func_call = "\$db->get_var(\"$query\", $x, $y)"; 2030 2031 if ( $this->check_collation( $query ) ) { 2032 $this->check_current_query = false; 2033 } 1858 2034 1859 2035 if ( $query ) { … … 1885 2061 public function get_row( $query = null, $output = OBJECT, $y = 0 ) { 1886 2062 $this->func_call = "\$db->get_row(\"$query\",$output,$y)"; 2063 2064 if ( $this->check_collation( $query ) ) { 2065 $this->check_current_query = false; 2066 } 2067 1887 2068 if ( $query ) { 1888 2069 $this->query( $query ); … … 1922 2103 */ 1923 2104 public function get_col( $query = null , $x = 0 ) { 2105 if ( $this->check_collation( $query ) ) { 2106 $this->check_current_query = false; 2107 } 2108 1924 2109 if ( $query ) { 1925 2110 $this->query( $query ); … … 1949 2134 public function get_results( $query = null, $output = OBJECT ) { 1950 2135 $this->func_call = "\$db->get_results(\"$query\", $output)"; 2136 2137 if ( $this->check_collation( $query ) ) { 2138 $this->check_current_query = false; 2139 } 1951 2140 1952 2141 if ( $query ) { … … 1992 2181 1993 2182 /** 2183 * Retrieves the character set for the given table. 2184 * 2185 * @since 4.2.0 2186 * @access protected 2187 * 2188 * @param string $table Table name. 2189 * @return string|WP_Error Table character set, {@see WP_Error} object if it couldn't be found. 2190 */ 2191 protected function get_table_charset( $table ) { 2192 $tablekey = strtolower( $table ); 2193 2194 /** 2195 * Filter the table charset value before the DB is checked. 2196 * 2197 * Passing a non-null value to the filter will effectively short-circuit 2198 * checking the DB for the charset, returning that value instead. 2199 * 2200 * @since 4.2.0 2201 * 2202 * @param string $charset The character set to use. Default null. 2203 * @param string $table The name of the table being checked. 2204 */ 2205 $charset = apply_filters( 'pre_get_table_charset', null, $table ); 2206 if ( null !== $charset ) { 2207 return $charset; 2208 } 2209 2210 if ( isset( $this->table_charset[ $tablekey ] ) ) { 2211 return $this->table_charset[ $tablekey ]; 2212 } 2213 2214 $charsets = $columns = array(); 2215 $results = $this->get_results( "SHOW FULL COLUMNS FROM `$table`" ); 2216 if ( ! $results ) { 2217 return new WP_Error( 'wpdb_get_table_charset_failure' ); 2218 } 2219 2220 foreach ( $results as $column ) { 2221 $columns[ strtolower( $column->Field ) ] = $column; 2222 } 2223 2224 $this->col_meta[ $tablekey ] = $columns; 2225 2226 foreach ( $columns as $column ) { 2227 if ( ! empty( $column->Collation ) ) { 2228 list( $charset ) = explode( '_', $column->Collation ); 2229 $charsets[ strtolower( $charset ) ] = true; 2230 } 2231 2232 list( $type ) = explode( '(', $column->Type ); 2233 2234 // A binary/blob means the whole query gets treated like this. 2235 if ( in_array( strtoupper( $type ), array( 'BINARY', 'VARBINARY', 'TINYBLOB', 'MEDIUMBLOB', 'BLOB', 'LONGBLOB' ) ) ) { 2236 $this->table_charset[ $tablekey ] = 'binary'; 2237 return 'binary'; 2238 } 2239 } 2240 2241 // utf8mb3 is an alias for utf8. 2242 if ( isset( $charsets['utf8mb3'] ) ) { 2243 $charsets['utf8'] = true; 2244 unset( $charsets['utf8mb3'] ); 2245 } 2246 2247 // Check if we have more than one charset in play. 2248 $count = count( $charsets ); 2249 if ( 1 === $count ) { 2250 $charset = key( $charsets ); 2251 } elseif ( 0 === $count ) { 2252 // No charsets, assume this table can store whatever. 2253 $charset = false; 2254 } else { 2255 // More than one charset. Remove latin1 if present and recalculate. 2256 unset( $charsets['latin1'] ); 2257 $count = count( $charsets ); 2258 if ( 1 === $count ) { 2259 // Only one charset (besides latin1). 2260 $charset = key( $charsets ); 2261 } elseif ( 2 === $count && isset( $charsets['utf8'], $charsets['utf8mb4'] ) ) { 2262 // Two charsets, but they're utf8 and utf8mb4, use utf8. 2263 $charset = 'utf8'; 2264 } else { 2265 // Two mixed character sets. ascii. 2266 $charset = 'ascii'; 2267 } 2268 } 2269 2270 $this->table_charset[ $tablekey ] = $charset; 2271 return $charset; 2272 } 2273 2274 /** 2275 * Retrieves the character set for the given column. 2276 * 2277 * @since 4.2.0 2278 * @access public 2279 * 2280 * @param string $table Table name. 2281 * @param string $column Column name. 2282 * @return mixed Column character set as a string. False if the column has no 2283 * character set. {@see WP_Error} object if there was an error. 2284 */ 2285 public function get_col_charset( $table, $column ) { 2286 $tablekey = strtolower( $table ); 2287 $columnkey = strtolower( $column ); 2288 2289 /** 2290 * Filter the column charset value before the DB is checked. 2291 * 2292 * Passing a non-null value to the filter will short-circuit 2293 * checking the DB for the charset, returning that value instead. 2294 * 2295 * @since 4.2.0 2296 * 2297 * @param string $charset The character set to use. Default null. 2298 * @param string $table The name of the table being checked. 2299 * @param string $column The name of the column being checked. 2300 */ 2301 $charset = apply_filters( 'pre_get_col_charset', null, $table, $column ); 2302 if ( null !== $charset ) { 2303 return $charset; 2304 } 2305 2306 // Skip this entirely if this isn't a MySQL database. 2307 if ( false === $this->is_mysql ) { 2308 return false; 2309 } 2310 2311 if ( empty( $this->table_charset[ $tablekey ] ) ) { 2312 // This primes column information for us. 2313 $table_charset = $this->get_table_charset( $table ); 2314 if ( is_wp_error( $table_charset ) ) { 2315 return $table_charset; 2316 } 2317 } 2318 2319 // If still no column information, return the table charset. 2320 if ( empty( $this->col_meta[ $tablekey ] ) ) { 2321 return $this->table_charset[ $tablekey ]; 2322 } 2323 2324 // If this column doesn't exist, return the table charset. 2325 if ( empty( $this->col_meta[ $tablekey ][ $columnkey ] ) ) { 2326 return $this->table_charset[ $tablekey ]; 2327 } 2328 2329 // Return false when it's not a string column. 2330 if ( empty( $this->col_meta[ $tablekey ][ $columnkey ]->Collation ) ) { 2331 return false; 2332 } 2333 2334 list( $charset ) = explode( '_', $this->col_meta[ $tablekey ][ $columnkey ]->Collation ); 2335 return $charset; 2336 } 2337 2338 /** 2339 * Check if a string is ASCII. 2340 * 2341 * The negative regex is faster for non-ASCII strings, as it allows 2342 * the search to finish as soon as it encounters a non-ASCII character. 2343 * 2344 * @since 4.2.0 2345 * @access protected 2346 * 2347 * @param string $string String to check. 2348 * @return bool True if ASCII, false if not. 2349 */ 2350 protected function check_ascii( $string ) { 2351 if ( function_exists( 'mb_check_encoding' ) ) { 2352 if ( mb_check_encoding( $string, 'ASCII' ) ) { 2353 return true; 2354 } 2355 } elseif ( ! preg_match( '/[^\x00-\x7F]/', $string ) ) { 2356 return true; 2357 } 2358 2359 return false; 2360 } 2361 2362 /** 2363 * Check if the query is accessing a collation considered safe on the current version of MySQL. 2364 * 2365 * @since 4.2.0 2366 * @access protected 2367 * 2368 * @param string $query The query to check. 2369 * @return bool True if the collation is safe, false if it isn't. 2370 */ 2371 protected function check_collation( $query ) { 2372 if ( $this->checking_collation ) { 2373 return true; 2374 } 2375 $table = $this->get_table_from_query( $query ); 2376 if ( ! $table ) { 2377 return false; 2378 } 2379 2380 $this->checking_collation = true; 2381 $this->get_table_charset( $table ); 2382 $this->checking_collation = false; 2383 2384 $table = strtolower( $table ); 2385 if ( empty( $this->col_meta[ $table ] ) ) { 2386 return false; 2387 } 2388 2389 foreach( $this->col_meta[ $table ] as $col ) { 2390 if ( empty( $col->Collation ) ) { 2391 continue; 2392 } 2393 2394 if ( ! in_array( $col->Collation, array( 'utf8_general_ci', 'utf8_bin', 'utf8mb4_general_ci', 'utf8mb4_bin' ), true ) ) { 2395 return false; 2396 } 2397 } 2398 2399 return true; 2400 } 2401 2402 /** 2403 * Strips any invalid characters based on value/charset pairs. 2404 * 2405 * @since 4.2.0 2406 * @access protected 2407 * 2408 * @param array $data Array of value arrays. Each value array has the keys 2409 * 'value' and 'charset'. An optional 'ascii' key can be 2410 * set to false to avoid redundant ASCII checks. 2411 * @return array|WP_Error The $data parameter, with invalid characters removed from 2412 * each value. This works as a passthrough: any additional keys 2413 * such as 'field' are retained in each value array. If we cannot 2414 * remove invalid characters, a {@see WP_Error} object is returned. 2415 */ 2416 protected function strip_invalid_text( $data ) { 2417 // Some multibyte character sets that we can check in PHP. 2418 $mb_charsets = array( 2419 'ascii' => 'ASCII', 2420 'big5' => 'BIG-5', 2421 'eucjpms' => 'eucJP-win', 2422 'gb2312' => 'EUC-CN', 2423 'ujis' => 'EUC-JP', 2424 'utf32' => 'UTF-32', 2425 ); 2426 2427 $supported_charsets = array(); 2428 if ( function_exists( 'mb_list_encodings' ) ) { 2429 $supported_charsets = mb_list_encodings(); 2430 } 2431 2432 $db_check_string = false; 2433 2434 foreach ( $data as &$value ) { 2435 $charset = $value['charset']; 2436 2437 // Column isn't a string, or is latin1, which will will happily store anything. 2438 if ( false === $charset || 'latin1' === $charset ) { 2439 continue; 2440 } 2441 2442 if ( ! is_string( $value['value'] ) ) { 2443 continue; 2444 } 2445 2446 // ASCII is always OK. 2447 if ( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) ) { 2448 continue; 2449 } 2450 2451 // Convert the text locally. 2452 if ( $supported_charsets ) { 2453 if ( isset( $mb_charsets[ $charset ] ) && in_array( $mb_charsets[ $charset ], $supported_charsets ) ) { 2454 $value['value'] = mb_convert_encoding( $value['value'], $mb_charsets[ $charset ], $mb_charsets[ $charset ] ); 2455 continue; 2456 } 2457 } 2458 2459 // utf8 can be handled by regex, which is a bunch faster than a DB lookup. 2460 if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) { 2461 $regex = '/ 2462 ( 2463 (?: [\x00-\x7F] # single-byte sequences 0xxxxxxx 2464 | [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx 2465 | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 2466 | [\xE1-\xEC][\x80-\xBF]{2} 2467 | \xED[\x80-\x9F][\x80-\xBF] 2468 | [\xEE-\xEF][\x80-\xBF]{2}'; 2469 2470 if ( 'utf8mb4' === $charset) { 2471 $regex .= ' 2472 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 2473 | [\xF1-\xF3][\x80-\xBF]{3} 2474 | \xF4[\x80-\x8F][\x80-\xBF]{2} 2475 '; 2476 } 2477 2478 $regex .= '){1,50} # ...one or more times 2479 ) 2480 | . # anything else 2481 /x'; 2482 $value['value'] = preg_replace( $regex, '$1', $value['value'] ); 2483 continue; 2484 } 2485 2486 // We couldn't use any local conversions, send it to the DB. 2487 $value['db'] = $db_check_string = true; 2488 } 2489 unset( $value ); // Remove by reference. 2490 2491 if ( $db_check_string ) { 2492 $queries = array(); 2493 foreach ( $data as $col => $value ) { 2494 if ( ! empty( $value['db'] ) ) { 2495 if ( ! isset( $queries[ $value['charset'] ] ) ) { 2496 $queries[ $value['charset'] ] = array(); 2497 } 2498 2499 // Split the CONVERT() calls by charset, so we can make sure the connection is right 2500 $queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( %s USING {$value['charset']} )", $value['value'] ); 2501 } 2502 } 2503 2504 $connection_charset = $this->charset; 2505 foreach ( $queries as $charset => $query ) { 2506 if ( ! $query ) { 2507 continue; 2508 } 2509 2510 // Change the charset to match the string(s) we're converting 2511 if ( $charset !== $connection_charset ) { 2512 $connection_charset = $charset; 2513 $this->set_charset( $this->dbh, $charset ); 2514 } 2515 2516 $this->check_current_query = false; 2517 2518 $row = $this->get_row( "SELECT " . implode( ', ', $query ), ARRAY_N ); 2519 if ( ! $row ) { 2520 $this->set_charset( $this->dbh, $connection_charset ); 2521 return new WP_Error( 'wpdb_strip_invalid_text_failure' ); 2522 } 2523 2524 $cols = array_keys( $query ); 2525 $col_count = count( $cols ); 2526 for ( $ii = 0; $ii < $col_count; $ii++ ) { 2527 $data[ $cols[ $ii ] ]['value'] = $row[ $ii ]; 2528 } 2529 } 2530 2531 // Don't forget to change the charset back! 2532 if ( $connection_charset !== $this->charset ) { 2533 $this->set_charset( $this->dbh ); 2534 } 2535 } 2536 2537 return $data; 2538 } 2539 2540 /** 2541 * Strips any invalid characters from the query. 2542 * 2543 * @since 4.2.0 2544 * @access protected 2545 * 2546 * @param string $query Query to convert. 2547 * @return string|WP_Error The converted query, or a {@see WP_Error} object if the conversion fails. 2548 */ 2549 protected function strip_invalid_text_from_query( $query ) { 2550 $table = $this->get_table_from_query( $query ); 2551 if ( $table ) { 2552 $charset = $this->get_table_charset( $table ); 2553 if ( is_wp_error( $charset ) ) { 2554 return $charset; 2555 } 2556 2557 // We can't reliably strip text from tables containing binary/blob columns 2558 if ( 'binary' === $charset ) { 2559 return $query; 2560 } 2561 } else { 2562 $charset = $this->charset; 2563 } 2564 2565 $data = array( 2566 'value' => $query, 2567 'charset' => $charset, 2568 'ascii' => false, 2569 ); 2570 2571 $data = $this->strip_invalid_text( array( $data ) ); 2572 if ( is_wp_error( $data ) ) { 2573 return $data; 2574 } 2575 2576 return $data[0]['value']; 2577 } 2578 2579 /** 2580 * Strips any invalid characters from the string for a given table and column. 2581 * 2582 * @since 4.2.0 2583 * @access public 2584 * 2585 * @param string $table Table name. 2586 * @param string $column Column name. 2587 * @param string $value The text to check. 2588 * @return string|WP_Error The converted string, or a `WP_Error` object if the conversion fails. 2589 */ 2590 public function strip_invalid_text_for_column( $table, $column, $value ) { 2591 if ( ! is_string( $value ) || $this->check_ascii( $value ) ) { 2592 return $value; 2593 } 2594 2595 $charset = $this->get_col_charset( $table, $column ); 2596 if ( ! $charset ) { 2597 // Not a string column. 2598 return $value; 2599 } elseif ( is_wp_error( $charset ) ) { 2600 // Bail on real errors. 2601 return $charset; 2602 } 2603 2604 $data = array( 2605 $column => array( 2606 'value' => $value, 2607 'charset' => $charset, 2608 'ascii' => false, 2609 ) 2610 ); 2611 2612 $data = $this->strip_invalid_text( $data ); 2613 if ( is_wp_error( $data ) ) { 2614 return $data; 2615 } 2616 2617 return $data[ $column ]['value']; 2618 } 2619 2620 /** 2621 * Find the first table name referenced in a query. 2622 * 2623 * @since 4.2.0 2624 * @access protected 2625 * 2626 * @param string $query The query to search. 2627 * @return string|false $table The table name found, or false if a table couldn't be found. 2628 */ 2629 protected function get_table_from_query( $query ) { 2630 // Remove characters that can legally trail the table name. 2631 $query = rtrim( $query, ';/-#' ); 2632 2633 // Allow (select...) union [...] style queries. Use the first query's table name. 2634 $query = ltrim( $query, "\r\n\t (" ); 2635 2636 /* 2637 * Strip everything between parentheses except nested selects and use only 1,000 2638 * chars of the query. 2639 */ 2640 $query = preg_replace( '/\((?!\s*select)[^(]*?\)/is', '()', substr( $query, 0, 1000 ) ); 2641 2642 // Quickly match most common queries. 2643 if ( preg_match( '/^\s*(?:' 2644 . 'SELECT.*?\s+FROM' 2645 . '|INSERT(?:\s+LOW_PRIORITY|\s+DELAYED|\s+HIGH_PRIORITY)?(?:\s+IGNORE)?(?:\s+INTO)?' 2646 . '|REPLACE(?:\s+LOW_PRIORITY|\s+DELAYED)?(?:\s+INTO)?' 2647 . '|UPDATE(?:\s+LOW_PRIORITY)?(?:\s+IGNORE)?' 2648 . '|DELETE(?:\s+LOW_PRIORITY|\s+QUICK|\s+IGNORE)*(?:\s+FROM)?' 2649 . ')\s+`?([\w-]+)`?/is', $query, $maybe ) ) { 2650 return $maybe[1]; 2651 } 2652 2653 // SHOW TABLE STATUS and SHOW TABLES 2654 if ( preg_match( '/^\s*(?:' 2655 . 'SHOW\s+TABLE\s+STATUS.+(?:LIKE\s+|WHERE\s+Name\s*=\s*)' 2656 . '|SHOW\s+(?:FULL\s+)?TABLES.+(?:LIKE\s+|WHERE\s+Name\s*=\s*)' 2657 . ')\W([\w-]+)\W/is', $query, $maybe ) ) { 2658 return $maybe[1]; 2659 } 2660 2661 // Big pattern for the rest of the table-related queries. 2662 if ( preg_match( '/^\s*(?:' 2663 . '(?:EXPLAIN\s+(?:EXTENDED\s+)?)?SELECT.*?\s+FROM' 2664 . '|DESCRIBE|DESC|EXPLAIN|HANDLER' 2665 . '|(?:LOCK|UNLOCK)\s+TABLE(?:S)?' 2666 . '|(?:RENAME|OPTIMIZE|BACKUP|RESTORE|CHECK|CHECKSUM|ANALYZE|REPAIR).*\s+TABLE' 2667 . '|TRUNCATE(?:\s+TABLE)?' 2668 . '|CREATE(?:\s+TEMPORARY)?\s+TABLE(?:\s+IF\s+NOT\s+EXISTS)?' 2669 . '|ALTER(?:\s+IGNORE)?\s+TABLE' 2670 . '|DROP\s+TABLE(?:\s+IF\s+EXISTS)?' 2671 . '|CREATE(?:\s+\w+)?\s+INDEX.*\s+ON' 2672 . '|DROP\s+INDEX.*\s+ON' 2673 . '|LOAD\s+DATA.*INFILE.*INTO\s+TABLE' 2674 . '|(?:GRANT|REVOKE).*ON\s+TABLE' 2675 . '|SHOW\s+(?:.*FROM|.*TABLE)' 2676 . ')\s+\(*\s*`?([\w-]+)`?\s*\)*/is', $query, $maybe ) ) { 2677 return $maybe[1]; 2678 } 2679 2680 return false; 2681 } 2682 2683 /** 1994 2684 * Load the column metadata from the last query. 1995 2685 * -
branches/4.1/tests/phpunit/tests/db.php
r30807 r32163 233 233 $this->assertNotEmpty( $wpdb->dbh ); 234 234 } 235 236 /** 237 * @ticket 21212 238 */ 239 function test_wpdb_actually_protected_properties() { 240 global $wpdb; 241 242 $new_meta = "HAHA I HOPE THIS DOESN'T WORK"; 243 244 $col_meta = $wpdb->col_meta; 245 $wpdb->col_meta = $new_meta; 246 247 $this->assertNotEquals( $col_meta, $new_meta ); 248 $this->assertEquals( $col_meta, $wpdb->col_meta ); 249 } 250 235 251 /** 236 252 * @ticket 18510 … … 505 521 $wpdb->suppress_errors( $suppress ); 506 522 } 523 524 /** 525 * @ticket 21212 526 */ 527 function data_get_table_from_query() { 528 $table = 'a_test_table_name'; 529 530 $queries = array( 531 // Basic 532 "SELECT * FROM $table", 533 "SELECT * FROM `$table`", 534 535 "INSERT $table", 536 "INSERT IGNORE $table", 537 "INSERT IGNORE INTO $table", 538 "INSERT INTO $table", 539 "INSERT LOW_PRIORITY $table", 540 "INSERT DELAYED $table", 541 "INSERT HIGH_PRIORITY $table", 542 "INSERT LOW_PRIORITY IGNORE $table", 543 "INSERT LOW_PRIORITY INTO $table", 544 "INSERT LOW_PRIORITY IGNORE INTO $table", 545 546 "REPLACE $table", 547 "REPLACE INTO $table", 548 "REPLACE LOW_PRIORITY $table", 549 "REPLACE DELAYED $table", 550 "REPLACE LOW_PRIORITY INTO $table", 551 552 "UPDATE LOW_PRIORITY $table", 553 "UPDATE LOW_PRIORITY IGNORE $table", 554 555 "DELETE $table", 556 "DELETE IGNORE $table", 557 "DELETE IGNORE FROM $table", 558 "DELETE FROM $table", 559 "DELETE LOW_PRIORITY $table", 560 "DELETE QUICK $table", 561 "DELETE IGNORE $table", 562 "DELETE LOW_PRIORITY FROM $table", 563 564 // STATUS 565 "SHOW TABLE STATUS LIKE '$table'", 566 "SHOW TABLE STATUS WHERE NAME='$table'", 567 568 "SHOW TABLES LIKE '$table'", 569 "SHOW FULL TABLES LIKE '$table'", 570 "SHOW TABLES WHERE NAME='$table'", 571 572 // Extended 573 "EXPLAIN SELECT * FROM $table", 574 "EXPLAIN EXTENDED SELECT * FROM $table", 575 "EXPLAIN EXTENDED SELECT * FROM `$table`", 576 577 "DESCRIBE $table", 578 "DESC $table", 579 "EXPLAIN $table", 580 "HANDLER $table", 581 582 "LOCK TABLE $table", 583 "LOCK TABLES $table", 584 "UNLOCK TABLE $table", 585 586 "RENAME TABLE $table", 587 "OPTIMIZE TABLE $table", 588 "BACKUP TABLE $table", 589 "RESTORE TABLE $table", 590 "CHECK TABLE $table", 591 "CHECKSUM TABLE $table", 592 "ANALYZE TABLE $table", 593 "REPAIR TABLE $table", 594 595 "TRUNCATE $table", 596 "TRUNCATE TABLE $table", 597 598 "CREATE TABLE $table", 599 "CREATE TEMPORARY TABLE $table", 600 "CREATE TABLE IF NOT EXISTS $table", 601 602 "ALTER TABLE $table", 603 "ALTER IGNORE TABLE $table", 604 605 "DROP TABLE $table", 606 "DROP TABLE IF EXISTS $table", 607 608 "CREATE INDEX foo(bar(20)) ON $table", 609 "CREATE UNIQUE INDEX foo(bar(20)) ON $table", 610 "CREATE FULLTEXT INDEX foo(bar(20)) ON $table", 611 "CREATE SPATIAL INDEX foo(bar(20)) ON $table", 612 613 "DROP INDEX foo ON $table", 614 615 "LOAD DATA INFILE 'wp.txt' INTO TABLE $table", 616 "LOAD DATA LOW_PRIORITY INFILE 'wp.txt' INTO TABLE $table", 617 "LOAD DATA CONCURRENT INFILE 'wp.txt' INTO TABLE $table", 618 "LOAD DATA LOW_PRIORITY LOCAL INFILE 'wp.txt' INTO TABLE $table", 619 "LOAD DATA INFILE 'wp.txt' REPLACE INTO TABLE $table", 620 "LOAD DATA INFILE 'wp.txt' IGNORE INTO TABLE $table", 621 622 "GRANT ALL ON TABLE $table", 623 "REVOKE ALL ON TABLE $table", 624 625 "SHOW COLUMNS FROM $table", 626 "SHOW FULL COLUMNS FROM $table", 627 "SHOW CREATE TABLE $table", 628 "SHOW INDEX FROM $table", 629 ); 630 631 foreach ( $queries as &$query ) { 632 $query = array( $query, $table ); 633 } 634 return $queries; 635 } 636 637 /** 638 * @dataProvider data_get_table_from_query 639 * @ticket 21212 640 */ 641 function test_get_table_from_query( $query, $table ) { 642 $this->assertEquals( $table, self::$_wpdb->get_table_from_query( $query ) ); 643 } 644 645 function data_get_table_from_query_false() { 646 $table = 'a_test_table_name'; 647 return array( 648 array( "LOL THIS ISN'T EVEN A QUERY $table" ), 649 ); 650 } 651 652 /** 653 * @dataProvider data_get_table_from_query_false 654 * @ticket 21212 655 */ 656 function test_get_table_from_query_false( $query ) { 657 $this->assertFalse( self::$_wpdb->get_table_from_query( $query ) ); 658 } 659 660 /** 661 * @ticket 21212 662 */ 663 function data_process_field_formats() { 664 $core_db_fields_no_format_specified = array( 665 array( 'post_content' => 'foo', 'post_parent' => 0 ), 666 null, 667 array( 668 'post_content' => array( 'value' => 'foo', 'format' => '%s' ), 669 'post_parent' => array( 'value' => 0, 'format' => '%d' ), 670 ) 671 ); 672 673 $core_db_fields_formats_specified = array( 674 array( 'post_content' => 'foo', 'post_parent' => 0 ), 675 array( '%d', '%s' ), // These override core field_types 676 array( 677 'post_content' => array( 'value' => 'foo', 'format' => '%d' ), 678 'post_parent' => array( 'value' => 0, 'format' => '%s' ), 679 ) 680 ); 681 682 $misc_fields_no_format_specified = array( 683 array( 'this_is_not_a_core_field' => 'foo', 'this_is_not_either' => 0 ), 684 null, 685 array( 686 'this_is_not_a_core_field' => array( 'value' => 'foo', 'format' => '%s' ), 687 'this_is_not_either' => array( 'value' => 0, 'format' => '%s' ), 688 ) 689 ); 690 691 $misc_fields_formats_specified = array( 692 array( 'this_is_not_a_core_field' => 0, 'this_is_not_either' => 1.2 ), 693 array( '%d', '%f' ), 694 array( 695 'this_is_not_a_core_field' => array( 'value' => 0, 'format' => '%d' ), 696 'this_is_not_either' => array( 'value' => 1.2, 'format' => '%f' ), 697 ) 698 ); 699 700 $misc_fields_insufficient_formats_specified = array( 701 array( 'this_is_not_a_core_field' => 0, 'this_is_not_either' => 's', 'nor_this' => 1 ), 702 array( '%d', '%s' ), // The first format is used for the third 703 array( 704 'this_is_not_a_core_field' => array( 'value' => 0, 'format' => '%d' ), 705 'this_is_not_either' => array( 'value' => 's', 'format' => '%s' ), 706 'nor_this' => array( 'value' => 1, 'format' => '%d' ), 707 ) 708 ); 709 710 $vars = get_defined_vars(); 711 // Push the variable name onto the end for assertSame $message 712 foreach ( $vars as $var_name => $var ) { 713 $vars[ $var_name ][] = $var_name; 714 } 715 return array_values( $vars ); 716 } 717 718 /** 719 * @dataProvider data_process_field_formats 720 * @ticket 21212 721 */ 722 function test_process_field_formats( $data, $format, $expected, $message ) { 723 $actual = self::$_wpdb->process_field_formats( $data, $format ); 724 $this->assertSame( $expected, $actual, $message ); 725 } 726 727 /** 728 * @ticket 21212 729 */ 730 function test_process_fields() { 731 global $wpdb; 732 733 if ( $wpdb->charset ) { 734 $expected_charset = $wpdb->charset; 735 } else { 736 $expected_charset = $wpdb->get_col_charset( $wpdb->posts, 'post_content' ); 737 } 738 739 if ( ! in_array( $expected_charset, array( 'utf8', 'utf8mb4', 'latin1' ) ) ) { 740 $this->markTestSkipped( "This test only works with utf8, utf8mb4 or latin1 character sets" ); 741 } 742 743 $data = array( 'post_content' => '¡foo foo foo!' ); 744 $expected = array( 745 'post_content' => array( 746 'value' => '¡foo foo foo!', 747 'format' => '%s', 748 'charset' => $expected_charset, 749 'ascii' => false, 750 ) 751 ); 752 753 $this->assertSame( $expected, self::$_wpdb->process_fields( $wpdb->posts, $data, null ) ); 754 } 755 756 /** 757 * @ticket 21212 758 * @depends test_process_fields 759 */ 760 function test_process_fields_on_nonexistent_table( $data ) { 761 self::$_wpdb->suppress_errors( true ); 762 $data = array( 'post_content' => '¡foo foo foo!' ); 763 $this->assertFalse( self::$_wpdb->process_fields( 'nonexistent_table', $data, null ) ); 764 self::$_wpdb->suppress_errors( false ); 765 } 766 767 /** 768 * @ticket 21212 769 */ 770 function test_pre_get_table_charset_filter() { 771 add_filter( 'pre_get_table_charset', array( $this, 'filter_pre_get_table_charset' ), 10, 2 ); 772 $charset = self::$_wpdb->get_table_charset( 'some_table' ); 773 remove_filter( 'pre_get_table_charset', array( $this, 'filter_pre_get_table_charset' ), 10 ); 774 775 $this->assertEquals( $charset, 'fake_charset' ); 776 } 777 function filter_pre_get_table_charset( $charset, $table ) { 778 return 'fake_charset'; 779 } 780 781 /** 782 * @ticket 21212 783 */ 784 function test_pre_get_col_charset_filter() { 785 add_filter( 'pre_get_col_charset', array( $this, 'filter_pre_get_col_charset' ), 10, 3 ); 786 $charset = self::$_wpdb->get_col_charset( 'some_table', 'some_col' ); 787 remove_filter( 'pre_get_col_charset', array( $this, 'filter_pre_get_col_charset' ), 10 ); 788 789 $this->assertEquals( $charset, 'fake_col_charset' ); 790 } 791 function filter_pre_get_col_charset( $charset, $table, $column ) { 792 return 'fake_col_charset'; 793 } 507 794 } 508 795 -
branches/4.1/tests/phpunit/tests/db/charset.php
r30807 r32163 1 <?php 2 3 require_once dirname( dirname( __FILE__ ) ) . '/db.php'; 4 5 /** 6 * Test WPDB methods 7 * 8 * @group wpdb 9 */ 10 class Tests_DB_Charset extends WP_UnitTestCase { 11 12 /** 13 * Our special WPDB 14 * @var resource 15 */ 16 protected static $_wpdb; 17 18 public static function setUpBeforeClass() { 19 self::$_wpdb = new wpdb_exposed_methods_for_testing(); 20 } 21 22 /** 23 * @ticket 21212 24 */ 25 function data_strip_invalid_text() { 26 $fields = array( 27 'latin1' => array( 28 // latin1. latin1 never changes. 29 'charset' => 'latin1', 30 'value' => "\xf0\x9f\x8e\xb7", 31 'expected' => "\xf0\x9f\x8e\xb7" 32 ), 33 'ascii' => array( 34 // ascii gets special treatment, make sure it's covered 35 'charset' => 'ascii', 36 'value' => 'Hello World', 37 'expected' => 'Hello World' 38 ), 39 'utf8' => array( 40 // utf8 only allows <= 3-byte chars 41 'charset' => 'utf8', 42 'value' => "H€llo\xf0\x9f\x98\x88World¢", 43 'expected' => 'H€lloWorld¢' 44 ), 45 'utf8mb3' => array( 46 // utf8mb3 should behave the same an utf8 47 'charset' => 'utf8mb3', 48 'value' => "H€llo\xf0\x9f\x98\x88World¢", 49 'expected' => 'H€lloWorld¢' 50 ), 51 'utf8mb4' => array( 52 // utf8mb4 allows 4-byte characters, too 53 'charset' => 'utf8mb4', 54 'value' => "H€llo\xf0\x9f\x98\x88World¢", 55 'expected' => "H€llo\xf0\x9f\x98\x88World¢" 56 ), 57 'koi8r' => array( 58 // koi8r is a character set that needs to be checked in MySQL 59 'charset' => 'koi8r', 60 'value' => "\xfdord\xf2ress", 61 'expected' => "\xfdord\xf2ress", 62 'db' => true 63 ), 64 'hebrew' => array( 65 // hebrew needs to be checked in MySQL, too 66 'charset' => 'hebrew', 67 'value' => "\xf9ord\xf7ress", 68 'expected' => "\xf9ord\xf7ress", 69 'db' => true 70 ), 71 'false' => array( 72 // false is a column with no character set (ie, a number column) 73 'charset' => false, 74 'value' => 100, 75 'expected' => 100 76 ), 77 ); 78 79 if ( function_exists( 'mb_convert_encoding' ) ) { 80 // big5 is a non-Unicode multibyte charset 81 $utf8 = "a\xe5\x85\xb1b"; // UTF-8 Character 20849 82 $big5 = mb_convert_encoding( $utf8, 'BIG-5', 'UTF-8' ); 83 $conv_utf8 = mb_convert_encoding( $big5, 'UTF-8', 'BIG-5' ); 84 // Make sure PHP's multibyte conversions are working correctly 85 $this->assertNotEquals( $utf8, $big5 ); 86 $this->assertEquals( $utf8, $conv_utf8 ); 87 88 $fields['big5'] = array( 89 'charset' => 'big5', 90 'value' => $big5, 91 'expected' => $big5 92 ); 93 } 94 95 // The data above is easy to edit. Now, prepare it for the data provider. 96 $data_provider = $multiple = $multiple_expected = array(); 97 foreach ( $fields as $test_case => $field ) { 98 $expected = $field; 99 $expected['value'] = $expected['expected']; 100 unset( $expected['expected'], $field['expected'] ); 101 102 // We're keeping track of these for our multiple-field test. 103 $multiple[] = $field; 104 $multiple_expected[] = $expected; 105 106 // strip_invalid_text() expects an array of fields. We're testing one field at a time. 107 $data = array( $field ); 108 $expected = array( $expected ); 109 110 // First argument is field data. Second is expected. Third is the message. 111 $data_provider[] = array( $data, $expected, $test_case ); 112 } 113 114 // Time for our test of multiple fields at once. 115 $data_provider[] = array( $multiple, $multiple_expected, 'multiple fields/charsets' ); 116 117 return $data_provider; 118 } 119 120 /** 121 * @dataProvider data_strip_invalid_text 122 * @ticket 21212 123 */ 124 function test_strip_invalid_text( $data, $expected, $message ) { 125 $actual = self::$_wpdb->strip_invalid_text( $data ); 126 $this->assertSame( $expected, $actual, $message ); 127 } 128 129 /** 130 * @ticket 21212 131 */ 132 function test_process_fields_failure() { 133 global $wpdb; 134 135 $charset = $wpdb->get_col_charset( $wpdb->posts, 'post_content' ); 136 if ( 'utf8' !== $charset && 'utf8mb4' !== $charset ) { 137 $this->markTestSkipped( 'This test requires a utf8 character set' ); 138 } 139 140 // \xf0\xff\xff\xff is invalid in utf8 and utf8mb4. 141 $data = array( 'post_content' => "H€llo\xf0\xff\xff\xffWorld¢" ); 142 $this->assertFalse( self::$_wpdb->process_fields( $wpdb->posts, $data, null ) ); 143 } 144 145 /** 146 * @ticket 21212 147 */ 148 function data_process_field_charsets() { 149 if ( $GLOBALS['wpdb']->charset ) { 150 $charset = $GLOBALS['wpdb']->charset; 151 } else { 152 $charset = $GLOBALS['wpdb']->get_col_charset( $GLOBALS['wpdb']->posts, 'post_content' ); 153 } 154 155 // 'value' and 'format' are $data, 'charset' ends up as part of $expected 156 157 $no_string_fields = array( 158 'post_parent' => array( 'value' => 10, 'format' => '%d', 'charset' => false ), 159 'comment_count' => array( 'value' => 0, 'format' => '%d', 'charset' => false ), 160 ); 161 162 $all_ascii_fields = array( 163 'post_content' => array( 'value' => 'foo foo foo!', 'format' => '%s', 'charset' => false ), 164 'post_excerpt' => array( 'value' => 'bar bar bar!', 'format' => '%s', 'charset' => false ), 165 ); 166 167 // This is the same data used in process_field_charsets_for_nonexistent_table() 168 $non_ascii_string_fields = array( 169 'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s', 'charset' => $charset, 'ascii' => false ), 170 'post_excerpt' => array( 'value' => '¡bar bar bar!', 'format' => '%s', 'charset' => $charset, 'ascii' => false ), 171 ); 172 173 $vars = get_defined_vars(); 174 unset( $vars['charset'] ); 175 foreach ( $vars as $var_name => $var ) { 176 $data = $expected = $var; 177 foreach ( $data as &$datum ) { 178 // 'charset' and 'ascii' are part of the expected return only. 179 unset( $datum['charset'], $datum['ascii'] ); 180 } 181 182 $vars[ $var_name ] = array( $data, $expected, $var_name ); 183 } 184 185 return array_values( $vars ); 186 } 187 188 /** 189 * @dataProvider data_process_field_charsets 190 * @ticket 21212 191 */ 192 function test_process_field_charsets( $data, $expected, $message ) { 193 $actual = self::$_wpdb->process_field_charsets( $data, $GLOBALS['wpdb']->posts ); 194 $this->assertSame( $expected, $actual, $message ); 195 } 196 197 /** 198 * The test this test depends on first verifies that this 199 * would normally work against the posts table. 200 * 201 * @ticket 21212 202 * @depends test_process_field_charsets 203 */ 204 function test_process_field_charsets_on_nonexistent_table() { 205 $data = array( 'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s' ) ); 206 self::$_wpdb->suppress_errors( true ); 207 $this->assertFalse( self::$_wpdb->process_field_charsets( $data, 'nonexistent_table' ) ); 208 self::$_wpdb->suppress_errors( false ); 209 } 210 211 /** 212 * @ticket 21212 213 */ 214 function test_check_ascii() { 215 $ascii = "\0\t\n\r '" . '!"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'; 216 $this->assertTrue( self::$_wpdb->check_ascii( $ascii ) ); 217 } 218 219 /** 220 * @ticket 21212 221 */ 222 function test_check_ascii_false() { 223 $this->assertFalse( self::$_wpdb->check_ascii( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ¡©«' ) ); 224 } 225 226 /** 227 * @ticket 21212 228 */ 229 function test_strip_invalid_text_for_column() { 230 global $wpdb; 231 232 $charset = $wpdb->get_col_charset( $wpdb->posts, 'post_content' ); 233 if ( 'utf8' !== $charset && 'utf8mb4' !== $charset ) { 234 $this->markTestSkipped( 'This test requires a utf8 character set' ); 235 } 236 237 // Invalid 3-byte and 4-byte sequences 238 $value = "H€llo\xe0\x80\x80World\xf0\xff\xff\xff¢"; 239 $expected = "H€lloWorld¢"; 240 $actual = $wpdb->strip_invalid_text_for_column( $wpdb->posts, 'post_content', $value ); 241 $this->assertEquals( $expected, $actual ); 242 } 243 244 /** 245 * Set of table definitions for testing wpdb::get_table_charset and wpdb::get_column_charset 246 * @var array 247 */ 248 protected $table_and_column_defs = array( 249 array( 250 'definition' => '( a INT, b FLOAT )', 251 'table_expected' => false, 252 'column_expected' => array( 'a' => false, 'b' => false ) 253 ), 254 array( 255 'definition' => '( a VARCHAR(50) CHARACTER SET big5, b TEXT CHARACTER SET big5 )', 256 'table_expected' => 'big5', 257 'column_expected' => array( 'a' => 'big5', 'b' => 'big5' ) 258 ), 259 array( 260 'definition' => '( a VARCHAR(50) CHARACTER SET big5, b BINARY )', 261 'table_expected' => 'binary', 262 'column_expected' => array( 'a' => 'big5', 'b' => false ) 263 ), 264 array( 265 'definition' => '( a VARCHAR(50) CHARACTER SET latin1, b BLOB )', 266 'table_expected' => 'binary', 267 'column_expected' => array( 'a' => 'latin1', 'b' => false ) 268 ), 269 array( 270 'definition' => '( a VARCHAR(50) CHARACTER SET latin1, b TEXT CHARACTER SET koi8r )', 271 'table_expected' => 'koi8r', 272 'column_expected' => array( 'a' => 'latin1', 'b' => 'koi8r' ) 273 ), 274 array( 275 'definition' => '( a VARCHAR(50) CHARACTER SET utf8mb3, b TEXT CHARACTER SET utf8mb3 )', 276 'table_expected' => 'utf8', 277 'column_expected' => array( 'a' => 'utf8', 'b' => 'utf8' ) 278 ), 279 array( 280 'definition' => '( a VARCHAR(50) CHARACTER SET utf8, b TEXT CHARACTER SET utf8mb4 )', 281 'table_expected' => 'utf8', 282 'column_expected' => array( 'a' => 'utf8', 'b' => 'utf8mb4' ) 283 ), 284 array( 285 'definition' => '( a VARCHAR(50) CHARACTER SET big5, b TEXT CHARACTER SET koi8r )', 286 'table_expected' => 'ascii', 287 'column_expected' => array( 'a' => 'big5', 'b' => 'koi8r' ) 288 ), 289 ); 290 291 /** 292 * @ticket 21212 293 */ 294 function data_test_get_table_charset() { 295 $table_name = 'test_get_table_charset'; 296 297 $vars = array(); 298 foreach( $this->table_and_column_defs as $value ) { 299 $this_table_name = $table_name . '_' . rand_str( 5 ); 300 $drop = "DROP TABLE IF EXISTS $this_table_name"; 301 $create = "CREATE TABLE $this_table_name {$value['definition']}"; 302 $vars[] = array( $drop, $create, $this_table_name, $value['table_expected'] ); 303 } 304 305 return $vars; 306 } 307 308 /** 309 * @dataProvider data_test_get_table_charset 310 * @ticket 21212 311 */ 312 function test_get_table_charset( $drop, $create, $table, $expected_charset ) { 313 self::$_wpdb->query( $drop ); 314 315 if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) { 316 $this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." ); 317 return; 318 } 319 320 self::$_wpdb->query( $create ); 321 322 $charset = self::$_wpdb->get_table_charset( $table ); 323 $this->assertEquals( $charset, $expected_charset ); 324 325 $charset = self::$_wpdb->get_table_charset( strtoupper( $table ) ); 326 $this->assertEquals( $charset, $expected_charset ); 327 328 self::$_wpdb->query( $drop ); 329 } 330 331 /** 332 * @ticket 21212 333 */ 334 function data_test_get_column_charset() { 335 $table_name = 'test_get_column_charset'; 336 337 $vars = array(); 338 foreach( $this->table_and_column_defs as $value ) { 339 $this_table_name = $table_name . '_' . rand_str( 5 ); 340 $drop = "DROP TABLE IF EXISTS $this_table_name"; 341 $create = "CREATE TABLE $this_table_name {$value['definition']}"; 342 $vars[] = array( $drop, $create, $this_table_name, $value['column_expected'] ); 343 } 344 345 return $vars; 346 } 347 348 /** 349 * @dataProvider data_test_get_column_charset 350 * @ticket 21212 351 */ 352 function test_get_column_charset( $drop, $create, $table, $expected_charset ) { 353 self::$_wpdb->query( $drop ); 354 355 if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) { 356 $this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." ); 357 return; 358 } 359 360 self::$_wpdb->query( $create ); 361 362 foreach ( $expected_charset as $column => $charset ) { 363 $this->assertEquals( $charset, self::$_wpdb->get_col_charset( $table, $column ) ); 364 $this->assertEquals( $charset, self::$_wpdb->get_col_charset( strtoupper( $table ), strtoupper( $column ) ) ); 365 } 366 367 self::$_wpdb->query( $drop ); 368 } 369 370 /** 371 * @dataProvider data_test_get_column_charset 372 * @ticket 21212 373 */ 374 function test_get_column_charset_non_mysql( $drop, $create, $table, $columns ) { 375 self::$_wpdb->query( $drop ); 376 377 if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) { 378 $this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." ); 379 return; 380 } 381 382 self::$_wpdb->is_mysql = false; 383 384 self::$_wpdb->query( $create ); 385 386 $columns = array_keys( $columns ); 387 foreach ( $columns as $column => $charset ) { 388 $this->assertEquals( false, self::$_wpdb->get_col_charset( $table, $column ) ); 389 } 390 391 self::$_wpdb->query( $drop ); 392 393 self::$_wpdb->is_mysql = true; 394 } 395 396 /** 397 * @ticket 21212 398 */ 399 function data_strip_invalid_text_from_query() { 400 $table_name = 'strip_invalid_text_from_query_table'; 401 $data = array( 402 array( 403 // binary tables don't get stripped 404 "( a VARCHAR(50) CHARACTER SET utf8, b BINARY )", // create 405 "('foo\xf0\x9f\x98\x88bar', 'foo')", // query 406 "('foo\xf0\x9f\x98\x88bar', 'foo')" // expected result 407 ), 408 array( 409 // utf8/utf8mb4 tables default to utf8 410 "( a VARCHAR(50) CHARACTER SET utf8, b VARCHAR(50) CHARACTER SET utf8mb4 )", 411 "('foo\xf0\x9f\x98\x88bar', 'foo')", 412 "('foobar', 'foo')" 413 ), 414 ); 415 416 foreach( $data as &$value ) { 417 $this_table_name = $table_name . '_' . rand_str( 5 ); 418 419 $value[0] = "CREATE TABLE $this_table_name {$value[0]}"; 420 $value[1] = "INSERT INTO $this_table_name VALUES {$value[1]}"; 421 $value[2] = "INSERT INTO $this_table_name VALUES {$value[2]}"; 422 $value[3] = "DROP TABLE IF EXISTS $this_table_name"; 423 } 424 unset( $value ); 425 426 return $data; 427 } 428 429 /** 430 * @dataProvider data_strip_invalid_text_from_query 431 * @ticket 21212 432 */ 433 function test_strip_invalid_text_from_query( $create, $query, $expected, $drop ) { 434 self::$_wpdb->query( $drop ); 435 436 if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) { 437 $this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." ); 438 return; 439 } 440 441 self::$_wpdb->query( $create ); 442 443 $return = self::$_wpdb->strip_invalid_text_from_query( $query ); 444 $this->assertEquals( $expected, $return ); 445 446 self::$_wpdb->query( $drop ); 447 } 448 449 /** 450 * @ticket 21212 451 */ 452 function test_invalid_characters_in_query() { 453 global $wpdb; 454 455 $charset = $wpdb->get_col_charset( $wpdb->posts, 'post_content' ); 456 if ( 'utf8' !== $charset && 'utf8mb4' !== $charset ) { 457 $this->markTestSkipped( 'This test requires a utf8 character set' ); 458 } 459 460 $this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\xff\xff\xffbar')" ) ); 461 } 462 463 /** 464 * @ticket 21212 465 */ 466 function data_table_collation_check() { 467 $table_name = 'table_collation_check'; 468 $data = array( 469 array( 470 // utf8_bin tables don't need extra sanity checking. 471 "( a VARCHAR(50) COLLATE utf8_bin )", // create 472 true // expected result 473 ), 474 array( 475 // Neither do utf8_general_ci tables. 476 "( a VARCHAR(50) COLLATE utf8_general_ci )", 477 true 478 ), 479 array( 480 // utf8_unicode_ci tables do. 481 "( a VARCHAR(50) COLLATE utf8_unicode_ci )", 482 false 483 ), 484 array( 485 // utf8_bin tables don't need extra sanity checking, 486 // except for when they're not just utf8_bin. 487 "( a VARCHAR(50) COLLATE utf8_bin, b VARCHAR(50) COLLATE big5_chinese_ci )", 488 false 489 ), 490 array( 491 // utf8_bin tables don't need extra sanity checking 492 // when the other columns aren't strings. 493 "( a VARCHAR(50) COLLATE utf8_bin, b INT )", 494 true 495 ), 496 ); 497 498 foreach( $data as &$value ) { 499 $this_table_name = $table_name . '_' . rand_str( 5 ); 500 501 $value[0] = "CREATE TABLE $this_table_name {$value[0]}"; 502 $value[2] = "SELECT * FROM $this_table_name"; 503 $value[3] = "DROP TABLE IF EXISTS $this_table_name"; 504 } 505 unset( $value ); 506 507 return $data; 508 } 509 510 511 /** 512 * @dataProvider data_table_collation_check 513 * @ticket 21212 514 */ 515 function test_table_collation_check( $create, $expected, $query, $drop ) { 516 self::$_wpdb->query( $drop ); 517 518 self::$_wpdb->query( $create ); 519 520 $return = self::$_wpdb->check_collation( $query ); 521 $this->assertEquals( $expected, $return ); 522 523 self::$_wpdb->query( $drop ); 524 } 525 }
Note: See TracChangeset
for help on using the changeset viewer.