WordPress.org

Make WordPress Core

Changeset 37601


Ignore:
Timestamp:
06/01/2016 02:37:20 AM (3 years ago)
Author:
pento
Message:

Database: Split the logic of wpdb::init_charset() into a separate method.

The logic for determining the appropriate character set and collation to use is becoming more complex, particularly with the recent additions of [37522] and [37523]. As init_charset() has side effects, and makes use of constants instead of parameters, it's not possible to unit test this logic.

This commit splits the logic part of init_charset() out into a new method, wpdb::determine_charset(), along with appropriate unit tests.

See #32105, #37522.

Fixes #36917.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/wp-db.php

    r37585 r37601  
    736736    public function init_charset() {
    737737        if ( function_exists('is_multisite') && is_multisite() ) {
    738             $this->charset = 'utf8';
     738            $charset = 'utf8';
    739739            if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) {
    740                 $this->collate = DB_COLLATE;
     740                $collate = DB_COLLATE;
    741741            } else {
    742                 $this->collate = 'utf8_general_ci';
     742                $collate = 'utf8_general_ci';
    743743            }
    744744        } elseif ( defined( 'DB_COLLATE' ) ) {
    745             $this->collate = DB_COLLATE;
     745            $collate = DB_COLLATE;
    746746        }
    747747
    748748        if ( defined( 'DB_CHARSET' ) ) {
    749             $this->charset = DB_CHARSET;
    750         }
    751 
     749            $charset = DB_CHARSET;
     750        }
     751
     752        $charset_collate = $this->determine_charset( $charset, $collate );
     753
     754        $this->charset = $charset_collate['charset'];
     755        $this->collate = $charset_collate['collate'];
     756    }
     757
     758    /**
     759     * Given a charset and collation, determine the best charset and collation to use.
     760     *
     761     * For example, when able, utf8mb4 should be used instead of utf8.
     762     *
     763     * @since 4.6.0
     764     *
     765     * @param  string $charset The character set to check.
     766     * @param  string $collate The collation to check.
     767     *
     768     * @return array The most appropriate character set and collation to use.
     769     */
     770    public function determine_charset( $charset, $collate ) {
    752771        if ( ( $this->use_mysqli && ! ( $this->dbh instanceof mysqli ) ) || empty( $this->dbh ) ) {
    753             return;
    754         }
    755 
    756         if ( 'utf8' === $this->charset && $this->has_cap( 'utf8mb4' ) ) {
    757             $this->charset = 'utf8mb4';
    758         }
    759 
    760         if ( 'utf8mb4' === $this->charset ) {
     772            return compact( 'charset', 'collate' );
     773        }
     774
     775        if ( 'utf8' === $charset && $this->has_cap( 'utf8mb4' ) ) {
     776            $charset = 'utf8mb4';
     777        }
     778
     779        if ( 'utf8mb4' === $charset ) {
    761780            // _general_ is outdated, so we can upgrade it to _unicode_, instead.
    762             if ( ! $this->collate || 'utf8_general_ci' === $this->collate ) {
    763                 $this->collate = 'utf8mb4_unicode_ci';
     781            if ( ! $collate || 'utf8_general_ci' === $collate ) {
     782                $collate = 'utf8mb4_unicode_ci';
    764783            } else {
    765                 $this->collate = str_replace( 'utf8_', 'utf8mb4_', $this->collate );
     784                $collate = str_replace( 'utf8_', 'utf8mb4_', $collate );
    766785            }
    767786        }
    768787
    769788        // _unicode_520_ is a better collation, we should use that when it's available.
    770         if ( $this->has_cap( 'utf8mb4_520' ) && 'utf8mb4_unicode_ci' === $this->collate ) {
    771             $this->collate = 'utf8mb4_unicode_520_ci';
    772         }
     789        if ( $this->has_cap( 'utf8mb4_520' ) && 'utf8mb4_unicode_ci' === $collate ) {
     790            $collate = 'utf8mb4_unicode_520_ci';
     791        }
     792
     793        return compact( 'charset', 'collate' );
    773794    }
    774795
  • trunk/tests/phpunit/tests/db.php

    r37522 r37601  
    956956        $wpdb->check_connection();
    957957    }
     958
     959    /**
     960     * @ticket 36917
     961     */
     962    function test_charset_not_determined_when_disconnected() {
     963        global $wpdb;
     964
     965        $charset = 'utf8';
     966        $collate = 'this_isnt_a_collation';
     967
     968        $wpdb->close();
     969
     970        $result = $wpdb->determine_charset( $charset, $collate );
     971
     972        $this->assertSame( compact( 'charset', 'collate' ), $result );
     973
     974        $wpdb->check_connection();
     975    }
     976
     977    /**
     978     * @ticket 36917
     979     */
     980    function test_charset_switched_to_utf8mb4() {
     981        global $wpdb;
     982
     983        if ( ! $wpdb->has_cap( 'utf8mb4' ) ) {
     984            $this->markTestSkipped( 'This test requires utf8mb4 support.' );
     985        }
     986
     987        $charset = 'utf8';
     988        $collate = 'utf8_general_ci';
     989
     990        $result = $wpdb->determine_charset( $charset, $collate );
     991
     992        $this->assertSame( 'utf8mb4', $result['charset'] );
     993    }
     994
     995    /**
     996     * @ticket 32105
     997     * @ticket 36917
     998     */
     999    function test_collate_switched_to_utf8mb4_520() {
     1000        global $wpdb;
     1001
     1002        if ( ! $wpdb->has_cap( 'utf8mb4_520' ) ) {
     1003            $this->markTestSkipped( 'This test requires utf8mb4_520 support.' );
     1004        }
     1005
     1006        $charset = 'utf8';
     1007        $collate = 'utf8_general_ci';
     1008
     1009        $result = $wpdb->determine_charset( $charset, $collate );
     1010
     1011        $this->assertSame( 'utf8mb4_unicode_520_ci', $result['collate'] );
     1012    }
     1013
     1014    /**
     1015     * @ticket 36917
     1016     * @ticket 37522
     1017     */
     1018    function test_non_unicode_collations() {
     1019        global $wpdb;
     1020
     1021        if ( ! $wpdb->has_cap( 'utf8mb4' ) ) {
     1022            $this->markTestSkipped( 'This test requires utf8mb4 support.' );
     1023        }
     1024
     1025        $charset = 'utf8';
     1026        $collate = 'utf8_swedish_ci';
     1027
     1028        $result = $wpdb->determine_charset( $charset, $collate );
     1029
     1030        $this->assertSame( 'utf8mb4_swedish_ci', $result['collate'] );
     1031    }
    9581032}
Note: See TracChangeset for help on using the changeset viewer.