WordPress.org

Make WordPress Core


Ignore:
Timestamp:
06/01/2016 02:37:20 AM (4 years ago)
Author:
pento
Message:

Database: Split the logic of wpdb::init_charset() into a separate method.

The logic for determining the appropriate character set and collation to use is becoming more complex, particularly with the recent additions of [37522] and [37523]. As init_charset() has side effects, and makes use of constants instead of parameters, it's not possible to unit test this logic.

This commit splits the logic part of init_charset() out into a new method, wpdb::determine_charset(), along with appropriate unit tests.

See #32105, #37522.

Fixes #36917.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/wp-db.php

    r37585 r37601  
    736736    public function init_charset() {
    737737        if ( function_exists('is_multisite') && is_multisite() ) {
    738             $this->charset = 'utf8';
     738            $charset = 'utf8';
    739739            if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) {
    740                 $this->collate = DB_COLLATE;
     740                $collate = DB_COLLATE;
    741741            } else {
    742                 $this->collate = 'utf8_general_ci';
     742                $collate = 'utf8_general_ci';
    743743            }
    744744        } elseif ( defined( 'DB_COLLATE' ) ) {
    745             $this->collate = DB_COLLATE;
     745            $collate = DB_COLLATE;
    746746        }
    747747
    748748        if ( defined( 'DB_CHARSET' ) ) {
    749             $this->charset = DB_CHARSET;
    750         }
    751 
     749            $charset = DB_CHARSET;
     750        }
     751
     752        $charset_collate = $this->determine_charset( $charset, $collate );
     753
     754        $this->charset = $charset_collate['charset'];
     755        $this->collate = $charset_collate['collate'];
     756    }
     757
     758    /**
     759     * Given a charset and collation, determine the best charset and collation to use.
     760     *
     761     * For example, when able, utf8mb4 should be used instead of utf8.
     762     *
     763     * @since 4.6.0
     764     *
     765     * @param  string $charset The character set to check.
     766     * @param  string $collate The collation to check.
     767     *
     768     * @return array The most appropriate character set and collation to use.
     769     */
     770    public function determine_charset( $charset, $collate ) {
    752771        if ( ( $this->use_mysqli && ! ( $this->dbh instanceof mysqli ) ) || empty( $this->dbh ) ) {
    753             return;
    754         }
    755 
    756         if ( 'utf8' === $this->charset && $this->has_cap( 'utf8mb4' ) ) {
    757             $this->charset = 'utf8mb4';
    758         }
    759 
    760         if ( 'utf8mb4' === $this->charset ) {
     772            return compact( 'charset', 'collate' );
     773        }
     774
     775        if ( 'utf8' === $charset && $this->has_cap( 'utf8mb4' ) ) {
     776            $charset = 'utf8mb4';
     777        }
     778
     779        if ( 'utf8mb4' === $charset ) {
    761780            // _general_ is outdated, so we can upgrade it to _unicode_, instead.
    762             if ( ! $this->collate || 'utf8_general_ci' === $this->collate ) {
    763                 $this->collate = 'utf8mb4_unicode_ci';
     781            if ( ! $collate || 'utf8_general_ci' === $collate ) {
     782                $collate = 'utf8mb4_unicode_ci';
    764783            } else {
    765                 $this->collate = str_replace( 'utf8_', 'utf8mb4_', $this->collate );
     784                $collate = str_replace( 'utf8_', 'utf8mb4_', $collate );
    766785            }
    767786        }
    768787
    769788        // _unicode_520_ is a better collation, we should use that when it's available.
    770         if ( $this->has_cap( 'utf8mb4_520' ) && 'utf8mb4_unicode_ci' === $this->collate ) {
    771             $this->collate = 'utf8mb4_unicode_520_ci';
    772         }
     789        if ( $this->has_cap( 'utf8mb4_520' ) && 'utf8mb4_unicode_ci' === $collate ) {
     790            $collate = 'utf8mb4_unicode_520_ci';
     791        }
     792
     793        return compact( 'charset', 'collate' );
    773794    }
    774795
Note: See TracChangeset for help on using the changeset viewer.