Make WordPress Core

Changeset 58763 for trunk


Ignore:
Timestamp:
07/18/2024 06:02:17 PM (2 months ago)
Author:
jorbin
Message:

General: Provide _is_utf8_charset() in compat.php for early use

#61182 introduced is_utf8_charset() as a way of standardizing checks for charset slugs referring to UTF-8. This is called by _mb_strlen() inside of compat.php, but is_utf8_charset() is defined in functions.php, which isn't loaded early on. Code calling mb_strlen() early on before functions.php loads in hosts without the multibyte extension therefore may crash.

Props dmsnell, jonsurrell, joemcgill, jorbin.
Fixes #61681.

Location:
trunk/src/wp-includes
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/compat.php

    r58147 r58763  
    3939
    4040    return $utf8_pcre;
     41}
     42
     43/**
     44 * Indicates if a given slug for a character set represents the UTF-8 text encoding.
     45 *
     46 * A charset is considered to represent UTF-8 if it is a case-insensitive match
     47 * of "UTF-8" with or without the hyphen.
     48 *
     49 * Example:
     50 *
     51 *     true  === _is_utf8_charset( 'UTF-8' );
     52 *     true  === _is_utf8_charset( 'utf8' );
     53 *     false === _is_utf8_charset( 'latin1' );
     54 *     false === _is_utf8_charset( 'UTF 8' );
     55 *
     56 *     // Only strings match.
     57 *     false === _is_utf8_charset( [ 'charset' => 'utf-8' ] );
     58 *
     59 * `is_utf8_charset` should be used outside of this file.
     60 *
     61 * @ignore
     62 * @since 6.6.1
     63 *
     64 * @param string $charset_slug Slug representing a text character encoding, or "charset".
     65 *                             E.g. "UTF-8", "Windows-1252", "ISO-8859-1", "SJIS".
     66 *
     67 * @return bool Whether the slug represents the UTF-8 encoding.
     68 */
     69function _is_utf8_charset( $charset_slug ) {
     70    if ( ! is_string( $charset_slug ) ) {
     71        return false;
     72    }
     73
     74    return (
     75        0 === strcasecmp( 'UTF-8', $charset_slug ) ||
     76        0 === strcasecmp( 'UTF8', $charset_slug )
     77    );
    4178}
    4279
     
    92129     * charset just use built-in substr().
    93130     */
    94     if ( ! is_utf8_charset( $encoding ) ) {
     131    if ( ! _is_utf8_charset( $encoding ) ) {
    95132        return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length );
    96133    }
     
    177214     * just use built-in strlen().
    178215     */
    179     if ( ! is_utf8_charset( $encoding ) ) {
     216    if ( ! _is_utf8_charset( $encoding ) ) {
    180217        return strlen( $str );
    181218    }
  • trunk/src/wp-includes/functions.php

    r58742 r58763  
    74977497 *
    74987498 * @since 6.6.0
     7499 * @since 6.6.1 A wrapper for _is_utf8_charset
     7500 *
     7501 * @see _is_utf8_charset
    74997502 *
    75007503 * @param string|null $blog_charset Optional. Slug representing a text character encoding, or "charset".
     
    75047507 */
    75057508function is_utf8_charset( $blog_charset = null ) {
    7506     $charset_to_examine = $blog_charset ?? get_option( 'blog_charset' );
    7507 
    7508     /*
    7509      * Only valid string values count: the absence of a charset
    7510      * does not imply any charset, let alone UTF-8.
    7511      */
    7512     if ( ! is_string( $charset_to_examine ) ) {
    7513         return false;
    7514     }
    7515 
    7516     return (
    7517         0 === strcasecmp( 'UTF-8', $charset_to_examine ) ||
    7518         0 === strcasecmp( 'UTF8', $charset_to_examine )
    7519     );
     7509    return _is_utf8_charset( $blog_charset ?? get_option( 'blog_charset' ) );
    75207510}
    75217511
Note: See TracChangeset for help on using the changeset viewer.