Make WordPress Core

Changeset 58764


Ignore:
Timestamp:
07/18/2024 06:20:39 PM (5 months ago)
Author:
jorbin
Message:

General: Provide _is_utf8_charset() in compat.php for early use

#61182 introduced is_utf8_charset() as a way of standardizing checks for charset slugs referring to UTF-8. This is called by _mb_strlen() inside of compat.php, but is_utf8_charset() is defined in functions.php, which isn't loaded early on. Code calling mb_strlen() early on before functions.php loads in hosts without the multibyte extension therefore may crash.

Reviewed by hellofromTonya.
Merges [58763] to the 6.6 branch.

Props dmsnell, jonsurrell, joemcgill, jorbin.
Fixes #61680.

Location:
branches/6.6
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/6.6

  • branches/6.6/src/wp-includes/compat.php

    r58147 r58764  
    3939
    4040    return $utf8_pcre;
     41}
     42
     43/**
     44 * Indicates if a given slug for a character set represents the UTF-8 text encoding.
     45 *
     46 * A charset is considered to represent UTF-8 if it is a case-insensitive match
     47 * of "UTF-8" with or without the hyphen.
     48 *
     49 * Example:
     50 *
     51 *     true  === _is_utf8_charset( 'UTF-8' );
     52 *     true  === _is_utf8_charset( 'utf8' );
     53 *     false === _is_utf8_charset( 'latin1' );
     54 *     false === _is_utf8_charset( 'UTF 8' );
     55 *
     56 *     // Only strings match.
     57 *     false === _is_utf8_charset( [ 'charset' => 'utf-8' ] );
     58 *
     59 * `is_utf8_charset` should be used outside of this file.
     60 *
     61 * @ignore
     62 * @since 6.6.1
     63 *
     64 * @param string $charset_slug Slug representing a text character encoding, or "charset".
     65 *                             E.g. "UTF-8", "Windows-1252", "ISO-8859-1", "SJIS".
     66 *
     67 * @return bool Whether the slug represents the UTF-8 encoding.
     68 */
     69function _is_utf8_charset( $charset_slug ) {
     70    if ( ! is_string( $charset_slug ) ) {
     71        return false;
     72    }
     73
     74    return (
     75        0 === strcasecmp( 'UTF-8', $charset_slug ) ||
     76        0 === strcasecmp( 'UTF8', $charset_slug )
     77    );
    4178}
    4279
     
    92129     * charset just use built-in substr().
    93130     */
    94     if ( ! is_utf8_charset( $encoding ) ) {
     131    if ( ! _is_utf8_charset( $encoding ) ) {
    95132        return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length );
    96133    }
     
    177214     * just use built-in strlen().
    178215     */
    179     if ( ! is_utf8_charset( $encoding ) ) {
     216    if ( ! _is_utf8_charset( $encoding ) ) {
    180217        return strlen( $str );
    181218    }
  • branches/6.6/src/wp-includes/functions.php

    r58570 r58764  
    74977497 *
    74987498 * @since 6.6.0
    7499  *
    7500  * @param ?string $blog_charset Slug representing a text character encoding, or "charset".
    7501  *                              E.g. "UTF-8", "Windows-1252", "ISO-8859-1", "SJIS".
     7499 * @since 6.6.1 A wrapper for _is_utf8_charset
     7500 *
     7501 * @see _is_utf8_charset
     7502 *
     7503 * @param string|null $blog_charset Optional. Slug representing a text character encoding, or "charset".
     7504 *                                  E.g. "UTF-8", "Windows-1252", "ISO-8859-1", "SJIS".
     7505 *                                  Default value is to infer from "blog_charset" option.
    75027506 * @return bool Whether the slug represents the UTF-8 encoding.
    75037507 */
    75047508function is_utf8_charset( $blog_charset = null ) {
    7505     $charset_to_examine = $blog_charset ?? get_option( 'blog_charset' );
    7506 
    7507     /*
    7508      * Only valid string values count: the absence of a charset
    7509      * does not imply any charset, let alone UTF-8.
    7510      */
    7511     if ( ! is_string( $charset_to_examine ) ) {
    7512         return false;
    7513     }
    7514 
    7515     return (
    7516         0 === strcasecmp( 'UTF-8', $charset_to_examine ) ||
    7517         0 === strcasecmp( 'UTF8', $charset_to_examine )
    7518     );
     7509    return _is_utf8_charset( $blog_charset ?? get_option( 'blog_charset' ) );
    75197510}
    75207511
Note: See TracChangeset for help on using the changeset viewer.