WordPress.org

Make WordPress Core

Changeset 38726


Ignore:
Timestamp:
10/04/2016 08:32:40 PM (3 years ago)
Author:
peterwilsoncc
Message:

HTTP API: Simplify wp_parse_url() to ensure consistent results.

[38694] revealed some URL formats were been parsed incorrectly, including those used by Google Fonts. This change simplifies the function to use placeholder values which cause PHP's parsing to behave consistently.

Props jrf, peterwilsoncc.
Fixes #36356.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/http.php

    r38694 r38726  
    624624
    625625/**
    626  * A wrapper for PHP's parse_url() function that handles edgecases in < PHP 5.4.7
     626 * A wrapper for PHP's parse_url() function that handles consistency in the return
     627 * values across PHP versions.
    627628 *
    628629 * PHP 5.4.7 expanded parse_url()'s ability to handle non-absolute url's, including
    629  * schemeless and relative url's with :// in the path, this works around those
    630  * limitations providing a standard output on PHP 5.2~5.4+.
     630 * schemeless and relative url's with :// in the path. This function works around
     631 * those limitations providing a standard output on PHP 5.2~5.4+.
     632 *
     633 * Secondly, across various PHP versions, schemeless URLs starting containing a ":"
     634 * in the query are being handled inconsistently. This function works around those
     635 * differences as well.
    631636 *
    632637 * Error suppression is used as prior to PHP 5.3.3, an E_WARNING would be generated
     
    641646 *                          Defaults to -1 (= return all parts as an array).
    642647 *                          @see http://php.net/manual/en/function.parse-url.php
    643  * @return mixed False on failure; Array of URL components on success;
    644  *               When a specific component has been requested: null if the component doesn't
    645  *               exist in the given URL; a sting or - in the case of PHP_URL_PORT - integer
    646  *               when it does; See parse_url()'s return values.
     648 * @return mixed False on parse failure; Array of URL components on success;
     649 *               When a specific component has been requested: null if the component
     650 *               doesn't exist in the given URL; a sting or - in the case of
     651 *               PHP_URL_PORT - integer when it does. See parse_url()'s return values.
    647652 */
    648653function wp_parse_url( $url, $component = -1 ) {
    649     $parts = @parse_url( $url, $component );
    650 
    651     if ( version_compare( PHP_VERSION, '5.4.7', '>=' ) ) {
     654    $to_unset = array();
     655    $url = strval( $url );
     656
     657    if ( '//' === substr( $url, 0, 2 ) ) {
     658        $to_unset[] = 'scheme';
     659        $url = 'placeholder:' . $url;
     660    } elseif ( '/' === substr( $url, 0, 1 ) ) {
     661        $to_unset[] = 'scheme';
     662        $to_unset[] = 'host';
     663        $url = 'placeholder://placeholder' . $url;
     664    }
     665
     666    $parts = @parse_url( $url );
     667
     668    if ( false === $parts ) {
     669        // Parsing failure.
    652670        return $parts;
    653671    }
    654672
    655     if ( false === $parts ) {
    656         // < PHP 5.4.7 compat, trouble with relative paths including a scheme break in the path.
    657         if ( '/' == $url[0] && false !== strpos( $url, '://' ) ) {
    658             if ( in_array( $component, array( PHP_URL_SCHEME, PHP_URL_HOST ), true ) ) {
    659                 return null;
    660             }
    661             // Since we know it's a relative path, prefix with a scheme/host placeholder and try again.
    662             if ( ! $parts = @parse_url( 'placeholder://placeholder' . $url, $component ) ) {
    663                 return $parts;
    664             }
    665             // Remove the placeholder values.
    666             if ( -1 === $component ) {
    667                 unset( $parts['scheme'], $parts['host'] );
    668             }
    669         } else {
    670             return $parts;
    671         }
    672     }
    673 
    674     // < PHP 5.4.7 compat, doesn't detect a schemeless URL's host field.
    675     if ( '//' == substr( $url, 0, 2 ) ) {
    676         if ( -1 === $component && ! isset( $parts['host'] ) ) {
    677             $path_parts = explode( '/', substr( $parts['path'], 2 ), 2 );
    678             $parts['host'] = $path_parts[0];
    679             if ( isset( $path_parts[1] ) ) {
    680                 $parts['path'] = '/' . $path_parts[1];
    681             } else {
    682                 unset( $parts['path'] );
    683             }
    684         } elseif ( PHP_URL_HOST === $component || PHP_URL_PATH === $component ) {
    685             $all_parts = @parse_url( $url );
    686             if ( ! isset( $all_parts['host'] ) ) {
    687                 $path_parts = explode( '/', substr( $all_parts['path'], 2 ), 2 );
    688                 if ( PHP_URL_PATH === $component ) {
    689                     if ( isset( $path_parts[1] ) ) {
    690                         $parts = '/' . $path_parts[1];
    691                     } else {
    692                         $parts = null;
    693                     }
    694                 } elseif ( PHP_URL_HOST === $component ) {
    695                     $parts = $path_parts[0];
    696                 }
    697             }
    698         }
    699     }
    700 
    701     return $parts;
    702 }
     673    // Remove the placeholder values.
     674    foreach ( $to_unset as $key ) {
     675        unset( $parts[ $key ] );
     676    }
     677
     678    return _get_component_from_parsed_url_array( $parts, $component );
     679}
     680
     681/**
     682 * Retrieve a specific component from a parsed URL array.
     683 *
     684 * @internal
     685 *
     686 * @since 4.7.0
     687 *
     688 * @param array|false $url_parts The parsed URL. Can be false if the URL failed to parse.
     689 * @param int    $component The specific component to retrieve. Use one of the PHP
     690 *                          predefined constants to specify which one.
     691 *                          Defaults to -1 (= return all parts as an array).
     692 *                          @see http://php.net/manual/en/function.parse-url.php
     693 * @return mixed False on parse failure; Array of URL components on success;
     694 *               When a specific component has been requested: null if the component
     695 *               doesn't exist in the given URL; a sting or - in the case of
     696 *               PHP_URL_PORT - integer when it does. See parse_url()'s return values.
     697 */
     698function _get_component_from_parsed_url_array( $url_parts, $component = -1 ) {
     699    if ( -1 === $component ) {
     700        return $url_parts;
     701    }
     702
     703    $key = _wp_translate_php_url_constant_to_key( $component );
     704    if ( false !== $key && is_array( $url_parts ) && isset( $url_parts[ $key ] ) ) {
     705        return $url_parts[ $key ];
     706    } else {
     707        return null;
     708    }
     709}
     710
     711/**
     712 * Translate a PHP_URL_* constant to the named array keys PHP uses.
     713 *
     714 * @internal
     715 *
     716 * @since 4.7.0
     717 *
     718 * @see   http://php.net/manual/en/url.constants.php
     719 *
     720 * @param int $constant PHP_URL_* constant.
     721 * @return string|bool The named key or false.
     722 */
     723function _wp_translate_php_url_constant_to_key( $constant ) {
     724    $translation = array(
     725        PHP_URL_SCHEME   => 'scheme',
     726        PHP_URL_HOST     => 'host',
     727        PHP_URL_PORT     => 'port',
     728        PHP_URL_USER     => 'user',
     729        PHP_URL_PASS     => 'pass',
     730        PHP_URL_PATH     => 'path',
     731        PHP_URL_QUERY    => 'query',
     732        PHP_URL_FRAGMENT => 'fragment',
     733    );
     734
     735    if ( isset( $translation[ $constant ] ) ) {
     736        return $translation[ $constant ];
     737    } else {
     738        return false;
     739    }
     740}
  • trunk/tests/phpunit/tests/http/http.php

    r38694 r38726  
    108108            array( '/://example.com/', array( 'path' => '/://example.com/' ) ),
    109109
     110            // Schemeless URL containing colons cause parse errors in PHP 7+.
     111            array(
     112                '//fonts.googleapis.com/css?family=Open+Sans:400&subset=latin',
     113                array(
     114                    'host'  => 'fonts.googleapis.com',
     115                    'path'  => '/css',
     116                    'query' => 'family=Open+Sans:400&subset=latin',
     117                ),
     118            ),
     119            array(
     120                '//fonts.googleapis.com/css?family=Open+Sans:400',
     121                array(
     122                    'host'  => 'fonts.googleapis.com',
     123                    'path'  => '/css',
     124                    'query' => 'family=Open+Sans:400',
     125                ),
     126            ),
     127
     128            array( 'filenamefound', array( 'path' => 'filenamefound' ) ),
     129
     130            // Empty string or non-string passed in.
     131            array( '', array( 'path' => '' ) ),
     132            array( 123, array( 'path' => '123' ) ),
    110133        );
    111134        /*
     
    118141    /**
    119142     * @ticket 36356
    120     */
     143    */
    121144    function test_wp_parse_url_with_default_component() {
    122145        $actual = wp_parse_url( self::FULL_TEST_URL, -1 );
     
    176199            array( '/://example.com/', PHP_URL_PATH, '/://example.com/' ),
    177200
     201            // Schemeless URL containing colons cause parse errors in PHP 7+.
     202            array( '//fonts.googleapis.com/css?family=Open+Sans:400&subset=latin', PHP_URL_HOST, 'fonts.googleapis.com' ),
     203            array( '//fonts.googleapis.com/css?family=Open+Sans:400&subset=latin', PHP_URL_PORT, null ),
     204            array( '//fonts.googleapis.com/css?family=Open+Sans:400&subset=latin', PHP_URL_PATH, '/css' ),
     205            array( '//fonts.googleapis.com/css?family=Open+Sans:400&subset=latin', PHP_URL_QUERY, 'family=Open+Sans:400&subset=latin' ),
     206            array( '//fonts.googleapis.com/css?family=Open+Sans:400', PHP_URL_HOST, 'fonts.googleapis.com' ), // 25
     207            array( '//fonts.googleapis.com/css?family=Open+Sans:400', PHP_URL_PORT, null ),
     208            array( '//fonts.googleapis.com/css?family=Open+Sans:400', PHP_URL_PATH, '/css' ), //27
     209            array( '//fonts.googleapis.com/css?family=Open+Sans:400', PHP_URL_QUERY, 'family=Open+Sans:400' ), //28
     210
     211            // Empty string or non-string passed in.
     212            array( '', PHP_URL_PATH, '' ),
     213            array( '', PHP_URL_QUERY, null ),
     214            array( 123, PHP_URL_PORT, null ),
     215            array( 123, PHP_URL_PATH, '123' ),
    178216        );
    179217    }
     
    225263        }
    226264    }
     265
     266    /**
     267     * @ticket 36356
     268     *
     269     * @dataProvider get_component_from_parsed_url_array_testcases
     270     */
     271    function test_get_component_from_parsed_url_array( $url, $component, $expected ) {
     272        $parts  = wp_parse_url( $url );
     273        $actual = _get_component_from_parsed_url_array( $parts, $component );
     274        $this->assertSame( $expected, $actual );
     275    }
     276
     277    function get_component_from_parsed_url_array_testcases() {
     278        // 0: A URL, 1: PHP URL constant, 2: The expected result.
     279        return array(
     280            array( 'http://example.com/', -1, array( 'scheme' => 'http', 'host' => 'example.com', 'path' => '/' ) ),
     281            array( 'http://example.com/', -1, array( 'scheme' => 'http', 'host' => 'example.com', 'path' => '/' ) ),
     282            array( 'http://example.com/', PHP_URL_HOST, 'example.com' ),
     283            array( 'http://example.com/', PHP_URL_USER, null ),
     284            array( 'http:///example.com', -1, false ), // Malformed.
     285            array( 'http:///example.com', PHP_URL_HOST, null ), // Malformed.
     286        );
     287    }
     288
     289    /**
     290     * @ticket 36356
     291     *
     292     * @dataProvider wp_translate_php_url_constant_to_key_testcases
     293     */
     294    function test_wp_translate_php_url_constant_to_key( $input, $expected ) {
     295        $actual = _wp_translate_php_url_constant_to_key( $input );
     296        $this->assertSame( $expected, $actual );
     297    }
     298
     299    function wp_translate_php_url_constant_to_key_testcases() {
     300        // 0: PHP URL constant, 1: The expected result.
     301        return array(
     302            array( PHP_URL_SCHEME, 'scheme' ),
     303            array( PHP_URL_HOST, 'host' ),
     304            array( PHP_URL_PORT, 'port' ),
     305            array( PHP_URL_USER, 'user' ),
     306            array( PHP_URL_PASS, 'pass' ),
     307            array( PHP_URL_PATH, 'path' ),
     308            array( PHP_URL_QUERY, 'query' ),
     309            array( PHP_URL_FRAGMENT, 'fragment' ),
     310
     311            // Test with non-PHP_URL_CONSTANT parameter.
     312            array( 'something', false ),
     313            array( ABSPATH, false ),
     314        );
     315    }
     316
    227317}
Note: See TracChangeset for help on using the changeset viewer.