Make WordPress Core


Ignore:
Timestamp:
10/04/2016 08:32:40 PM (8 years ago)
Author:
peterwilsoncc
Message:

HTTP API: Simplify wp_parse_url() to ensure consistent results.

[38694] revealed some URL formats were been parsed incorrectly, including those used by Google Fonts. This change simplifies the function to use placeholder values which cause PHP's parsing to behave consistently.

Props jrf, peterwilsoncc.
Fixes #36356.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/http.php

    r38694 r38726  
    624624
    625625/**
    626  * A wrapper for PHP's parse_url() function that handles edgecases in < PHP 5.4.7
     626 * A wrapper for PHP's parse_url() function that handles consistency in the return
     627 * values across PHP versions.
    627628 *
    628629 * PHP 5.4.7 expanded parse_url()'s ability to handle non-absolute url's, including
    629  * schemeless and relative url's with :// in the path, this works around those
    630  * limitations providing a standard output on PHP 5.2~5.4+.
     630 * schemeless and relative url's with :// in the path. This function works around
     631 * those limitations providing a standard output on PHP 5.2~5.4+.
     632 *
     633 * Secondly, across various PHP versions, schemeless URLs starting containing a ":"
     634 * in the query are being handled inconsistently. This function works around those
     635 * differences as well.
    631636 *
    632637 * Error suppression is used as prior to PHP 5.3.3, an E_WARNING would be generated
     
    641646 *                          Defaults to -1 (= return all parts as an array).
    642647 *                          @see http://php.net/manual/en/function.parse-url.php
    643  * @return mixed False on failure; Array of URL components on success;
    644  *               When a specific component has been requested: null if the component doesn't
    645  *               exist in the given URL; a sting or - in the case of PHP_URL_PORT - integer
    646  *               when it does; See parse_url()'s return values.
     648 * @return mixed False on parse failure; Array of URL components on success;
     649 *               When a specific component has been requested: null if the component
     650 *               doesn't exist in the given URL; a sting or - in the case of
     651 *               PHP_URL_PORT - integer when it does. See parse_url()'s return values.
    647652 */
    648653function wp_parse_url( $url, $component = -1 ) {
    649     $parts = @parse_url( $url, $component );
    650 
    651     if ( version_compare( PHP_VERSION, '5.4.7', '>=' ) ) {
     654    $to_unset = array();
     655    $url = strval( $url );
     656
     657    if ( '//' === substr( $url, 0, 2 ) ) {
     658        $to_unset[] = 'scheme';
     659        $url = 'placeholder:' . $url;
     660    } elseif ( '/' === substr( $url, 0, 1 ) ) {
     661        $to_unset[] = 'scheme';
     662        $to_unset[] = 'host';
     663        $url = 'placeholder://placeholder' . $url;
     664    }
     665
     666    $parts = @parse_url( $url );
     667
     668    if ( false === $parts ) {
     669        // Parsing failure.
    652670        return $parts;
    653671    }
    654672
    655     if ( false === $parts ) {
    656         // < PHP 5.4.7 compat, trouble with relative paths including a scheme break in the path.
    657         if ( '/' == $url[0] && false !== strpos( $url, '://' ) ) {
    658             if ( in_array( $component, array( PHP_URL_SCHEME, PHP_URL_HOST ), true ) ) {
    659                 return null;
    660             }
    661             // Since we know it's a relative path, prefix with a scheme/host placeholder and try again.
    662             if ( ! $parts = @parse_url( 'placeholder://placeholder' . $url, $component ) ) {
    663                 return $parts;
    664             }
    665             // Remove the placeholder values.
    666             if ( -1 === $component ) {
    667                 unset( $parts['scheme'], $parts['host'] );
    668             }
    669         } else {
    670             return $parts;
    671         }
    672     }
    673 
    674     // < PHP 5.4.7 compat, doesn't detect a schemeless URL's host field.
    675     if ( '//' == substr( $url, 0, 2 ) ) {
    676         if ( -1 === $component && ! isset( $parts['host'] ) ) {
    677             $path_parts = explode( '/', substr( $parts['path'], 2 ), 2 );
    678             $parts['host'] = $path_parts[0];
    679             if ( isset( $path_parts[1] ) ) {
    680                 $parts['path'] = '/' . $path_parts[1];
    681             } else {
    682                 unset( $parts['path'] );
    683             }
    684         } elseif ( PHP_URL_HOST === $component || PHP_URL_PATH === $component ) {
    685             $all_parts = @parse_url( $url );
    686             if ( ! isset( $all_parts['host'] ) ) {
    687                 $path_parts = explode( '/', substr( $all_parts['path'], 2 ), 2 );
    688                 if ( PHP_URL_PATH === $component ) {
    689                     if ( isset( $path_parts[1] ) ) {
    690                         $parts = '/' . $path_parts[1];
    691                     } else {
    692                         $parts = null;
    693                     }
    694                 } elseif ( PHP_URL_HOST === $component ) {
    695                     $parts = $path_parts[0];
    696                 }
    697             }
    698         }
    699     }
    700 
    701     return $parts;
    702 }
     673    // Remove the placeholder values.
     674    foreach ( $to_unset as $key ) {
     675        unset( $parts[ $key ] );
     676    }
     677
     678    return _get_component_from_parsed_url_array( $parts, $component );
     679}
     680
     681/**
     682 * Retrieve a specific component from a parsed URL array.
     683 *
     684 * @internal
     685 *
     686 * @since 4.7.0
     687 *
     688 * @param array|false $url_parts The parsed URL. Can be false if the URL failed to parse.
     689 * @param int    $component The specific component to retrieve. Use one of the PHP
     690 *                          predefined constants to specify which one.
     691 *                          Defaults to -1 (= return all parts as an array).
     692 *                          @see http://php.net/manual/en/function.parse-url.php
     693 * @return mixed False on parse failure; Array of URL components on success;
     694 *               When a specific component has been requested: null if the component
     695 *               doesn't exist in the given URL; a sting or - in the case of
     696 *               PHP_URL_PORT - integer when it does. See parse_url()'s return values.
     697 */
     698function _get_component_from_parsed_url_array( $url_parts, $component = -1 ) {
     699    if ( -1 === $component ) {
     700        return $url_parts;
     701    }
     702
     703    $key = _wp_translate_php_url_constant_to_key( $component );
     704    if ( false !== $key && is_array( $url_parts ) && isset( $url_parts[ $key ] ) ) {
     705        return $url_parts[ $key ];
     706    } else {
     707        return null;
     708    }
     709}
     710
     711/**
     712 * Translate a PHP_URL_* constant to the named array keys PHP uses.
     713 *
     714 * @internal
     715 *
     716 * @since 4.7.0
     717 *
     718 * @see   http://php.net/manual/en/url.constants.php
     719 *
     720 * @param int $constant PHP_URL_* constant.
     721 * @return string|bool The named key or false.
     722 */
     723function _wp_translate_php_url_constant_to_key( $constant ) {
     724    $translation = array(
     725        PHP_URL_SCHEME   => 'scheme',
     726        PHP_URL_HOST     => 'host',
     727        PHP_URL_PORT     => 'port',
     728        PHP_URL_USER     => 'user',
     729        PHP_URL_PASS     => 'pass',
     730        PHP_URL_PATH     => 'path',
     731        PHP_URL_QUERY    => 'query',
     732        PHP_URL_FRAGMENT => 'fragment',
     733    );
     734
     735    if ( isset( $translation[ $constant ] ) ) {
     736        return $translation[ $constant ];
     737    } else {
     738        return false;
     739    }
     740}
Note: See TracChangeset for help on using the changeset viewer.