Ticket #36356: 36356-wp_parse_url-additional-edge-cases.patch
File 36356-wp_parse_url-additional-edge-cases.patch, 11.7 KB (added by , 8 years ago) |
---|
-
src/wp-includes/http.php
From 5ef2a1ac3b4274279ef6ce431b8b4dcc9bbcf383 Mon Sep 17 00:00:00 2001 From: jrfnl <github_nospam@adviesenzo.nl> Date: Sat, 1 Oct 2016 10:41:06 +0200 Subject: [PATCH] Fix additional edge cases. --- src/wp-includes/http.php | 173 ++++++++++++++++++++++++++++---------- tests/phpunit/tests/http/http.php | 42 ++++++++- 2 files changed, 170 insertions(+), 45 deletions(-) diff --git a/src/wp-includes/http.php b/src/wp-includes/http.php index 3738b79..8c26d57 100644
a b function ms_allowed_http_request_hosts( $is_external, $host ) { 623 623 } 624 624 625 625 /** 626 * A wrapper for PHP's parse_url() function that handles edgecases in < PHP 5.4.7 626 * A wrapper for PHP's parse_url() function that handles consistency in the return 627 * values across PHP versions. 627 628 * 628 629 * PHP 5.4.7 expanded parse_url()'s ability to handle non-absolute url's, including 629 * schemeless and relative url's with :// in the path, this works around those 630 * limitations providing a standard output on PHP 5.2~5.4+. 630 * schemeless and relative url's with :// in the path. This function works around 631 * those limitations providing a standard output on PHP 5.2~5.4+. 632 * 633 * Secondly, across various PHP versions, schemeless URLs starting containing a ":" 634 * in the query are being handled inconsistently. This function works around those 635 * differences as well. 636 * 637 * Lastly, PHP does not recognize query parameters which start with an ampersant (&) 638 * instead of a question mark (?) as query parameters. That edge case is also 639 * handled by this function. 631 640 * 632 641 * Error suppression is used as prior to PHP 5.3.3, an E_WARNING would be generated 633 642 * when URL parsing failed. 634 643 * 635 644 * @since 4.4.0 636 645 * @since 4.7.0 The $component parameter was added for parity with PHP's parse_url(). 646 * Handling of query parameters starting with & was added. 637 647 * 638 648 * @param string $url The URL to parse. 639 649 * @param int $component The specific component to retrieve. Use one of the PHP 640 650 * predefined constants to specify which one. 641 651 * Defaults to -1 (= return all parts as an array). 642 652 * @see http://php.net/manual/en/function.parse-url.php 643 * @return mixed False on failure; Array of URL components on success;644 * When a specific component has been requested: null if the component doesn't645 * exist in the given URL; a sting or - in the case of PHP_URL_PORT - integer646 * when it does;See parse_url()'s return values.653 * @return mixed False on parse failure; Array of URL components on success; 654 * When a specific component has been requested: null if the component 655 * doesn't exist in the given URL; a sting or - in the case of 656 * PHP_URL_PORT - integer when it does. See parse_url()'s return values. 647 657 */ 648 658 function wp_parse_url( $url, $component = -1 ) { 649 $parts = @parse_url( $url , $component);659 $parts = @parse_url( $url ); 650 660 651 if ( version_compare( PHP_VERSION, '5.4.7', '>=' ) ) { 652 return $parts; 653 } 661 $schemeless_with_colon = ( '//' === substr( $url, 0, 2 ) && false !== strpos( $url, ':' ) ); 654 662 655 663 if ( false === $parts ) { 656 664 // < PHP 5.4.7 compat, trouble with relative paths including a scheme break in the path. 657 if ( '/' == $url[0] && false !== strpos( $url, '://' ) ) { 658 if ( in_array( $component, array( PHP_URL_SCHEME, PHP_URL_HOST ), true ) ) { 659 return null; 665 if ( '/' === $url[0] && ( false !== strpos( $url, '://' ) || true === $schemeless_with_colon ) ) { 666 if ( true === $schemeless_with_colon ) { 667 if ( PHP_URL_SCHEME === $component ) { 668 return null; 669 } else { 670 // Since we know it's a schemeless path, prefix with a scheme placeholder. 671 $url = 'placeholder:' . $url; 672 $to_unset = array( 'scheme' ); 673 } 674 } else { 675 if ( in_array( $component, array( PHP_URL_SCHEME, PHP_URL_HOST ), true ) ) { 676 return null; 677 } else { 678 // Since we know it's a relative path, prefix with a scheme/host placeholder. 679 $url = 'placeholder://placeholder' . $url; 680 $to_unset = array( 'scheme', 'host' ); 681 } 660 682 } 661 // Since we know it's a relative path, prefix with a scheme/host placeholder and try again. 662 if ( ! $parts = @parse_url( 'placeholder://placeholder' . $url, $component ) ) { 663 return $parts; 683 684 $parts = @parse_url( $url ); 685 if ( false === $parts ) { 686 return _get_component_from_parsed_url_array( $parts, $component ); 664 687 } 688 665 689 // Remove the placeholder values. 666 if ( -1 === $component) {667 unset( $parts[ 'scheme'], $parts['host'] );690 foreach ( $to_unset as $key ) { 691 unset( $parts[ $key ] ); 668 692 } 669 693 } else { 670 return $parts;694 return _get_component_from_parsed_url_array( $parts, $component ); 671 695 } 672 696 } 673 697 674 698 // < PHP 5.4.7 compat, doesn't detect a schemeless URL's host field. 675 if ( '//' == substr( $url, 0, 2 ) ) { 676 if ( -1 === $component && ! isset( $parts['host'] ) ) { 677 $path_parts = explode( '/', substr( $parts['path'], 2 ), 2 ); 678 $parts['host'] = $path_parts[0]; 679 if ( isset( $path_parts[1] ) ) { 680 $parts['path'] = '/' . $path_parts[1]; 681 } else { 682 unset( $parts['path'] ); 683 } 684 } elseif ( PHP_URL_HOST === $component || PHP_URL_PATH === $component ) { 685 $all_parts = @parse_url( $url ); 686 if ( ! isset( $all_parts['host'] ) ) { 687 $path_parts = explode( '/', substr( $all_parts['path'], 2 ), 2 ); 688 if ( PHP_URL_PATH === $component ) { 689 if ( isset( $path_parts[1] ) ) { 690 $parts = '/' . $path_parts[1]; 691 } else { 692 $parts = null; 693 } 694 } elseif ( PHP_URL_HOST === $component ) { 695 $parts = $path_parts[0]; 696 } 697 } 699 if ( '//' == substr( $url, 0, 2 ) && ! isset( $parts['host'] ) && isset( $parts['path'] ) ) { 700 $path_parts = explode( '/', substr( $parts['path'], 2 ), 2 ); 701 $parts['host'] = $path_parts[0]; 702 if ( isset( $path_parts[1] ) ) { 703 $parts['path'] = '/' . $path_parts[1]; 704 } else { 705 unset( $parts['path'] ); 706 } 707 } 708 709 // Deal with query parameters starting with & instead of ? 710 if ( isset( $parts['path'] ) && ! isset( $parts['query'] ) ) { 711 $ampersant_in_path = strpos( $parts['path'], '&', strrpos( $parts['path'], '/' ) ); 712 if ( false !== $ampersant_in_path ) { 713 $parts['query'] = substr( $parts['path'], ( $ampersant_in_path + 1 ) ); 714 $parts['path'] = str_replace( '&' . $parts['query'], '', $parts['path'] ); 698 715 } 699 716 } 700 717 701 return $parts; 718 // HHVM mistakenly interprets a ':400' in the query string as the port. 719 if ( isset( $parts['port'], $parts['query'] ) && false !== strpos( $parts['query'], ':' . $parts['port'] ) ) { 720 if ( 1 === substr_count( $url, ':' . $parts['port'] ) ) { 721 unset( $parts['port'] ); 722 } 723 } 724 725 return _get_component_from_parsed_url_array( $parts, $component ); 726 } 727 728 /** 729 * Retrieve a specific component from a parsed URL array. 730 * 731 * @internal 732 * 733 * @since 4.7.0 734 * 735 * @param array|false $url_parts The parsed URL. Can be false if the URL failed to parse. 736 * @param int $component The specific component to retrieve. Use one of the PHP 737 * predefined constants to specify which one. 738 * Defaults to -1 (= return all parts as an array). 739 * @see http://php.net/manual/en/function.parse-url.php 740 * @return mixed False on parse failure; Array of URL components on success; 741 * When a specific component has been requested: null if the component 742 * doesn't exist in the given URL; a sting or - in the case of 743 * PHP_URL_PORT - integer when it does. See parse_url()'s return values. 744 */ 745 function _get_component_from_parsed_url_array( $url_parts, $component = -1 ) { 746 if ( -1 === $component ) { 747 return $url_parts; 748 } 749 750 $key = _wp_translate_php_url_constant_to_key( $component ); 751 if ( false !== $key && is_array( $url_parts ) && isset( $url_parts[ $key ] ) ) { 752 return $url_parts[ $key ]; 753 } else { 754 return null; 755 } 756 } 757 758 /** 759 * Translate a PHP_URL_* constant to the named array keys PHP uses. 760 * 761 * @internal 762 * 763 * @since 4.7.0 764 * 765 * @see http://php.net/manual/en/url.constants.php 766 * 767 * @param int $constant PHP_URL_* constant. 768 * @return string|bool The named key or false. 769 */ 770 function _wp_translate_php_url_constant_to_key( $constant ) { 771 $translation = array( 772 PHP_URL_SCHEME => 'scheme', 773 PHP_URL_HOST => 'host', 774 PHP_URL_PORT => 'port', 775 PHP_URL_USER => 'user', 776 PHP_URL_PASS => 'pass', 777 PHP_URL_PATH => 'path', 778 PHP_URL_QUERY => 'query', 779 PHP_URL_FRAGMENT => 'fragment', 780 ); 781 782 if ( isset( $translation[ $constant ] ) ) { 783 return $translation[ $constant ]; 784 } else { 785 return false; 786 } 702 787 } -
tests/phpunit/tests/http/http.php
diff --git a/tests/phpunit/tests/http/http.php b/tests/phpunit/tests/http/http.php index 607eb47..dc49b24 100644
a b class Tests_HTTP_HTTP extends WP_UnitTestCase { 107 107 // PHP's parse_url() calls this an invalid url, we handle it as a path 108 108 array( '/://example.com/', array( 'path' => '/://example.com/' ) ), 109 109 110 // Schemeless URL containing colons cause parse errors in PHP 7+. 111 array( '//fonts.googleapis.com/css?family=Open+Sans:400&subset=latin', array( 112 'host' => 'fonts.googleapis.com', 113 'path' => '/css', 114 'query' => 'family=Open+Sans:400&subset=latin', 115 ) ), 116 array( '//fonts.googleapis.com/css?family=Open+Sans:400', array( 117 'host' => 'fonts.googleapis.com', 118 'path' => '/css', 119 'query' => 'family=Open+Sans:400', 120 ) ), 121 122 // Query parameter starting with & instead of ? 123 array( 'http://www.test.com/path1/path2/&q=a', array( 124 'scheme' => 'http', 125 'host' => 'www.test.com', 126 'path' => '/path1/path2/', 127 'query' => 'q=a', 128 ) ), 129 array( 'http://www.test.com/path1/path2/file.php&q=a', array( 130 'scheme' => 'http', 131 'host' => 'www.test.com', 132 'path' => '/path1/path2/file.php', 133 'query' => 'q=a', 134 ) ), 110 135 ); 111 136 /* 112 137 Untestable edge cases in various PHP: … … class Tests_HTTP_HTTP extends WP_UnitTestCase { 117 142 118 143 /** 119 144 * @ticket 36356 120 145 */ 121 146 function test_wp_parse_url_with_default_component() { 122 147 $actual = wp_parse_url( self::FULL_TEST_URL, -1 ); 123 148 $this->assertEquals( array( … … class Tests_HTTP_HTTP extends WP_UnitTestCase { 175 200 // PHP's parse_url() calls this an invalid URL, we handle it as a path. 176 201 array( '/://example.com/', PHP_URL_PATH, '/://example.com/' ), 177 202 203 // Schemeless URL containing colons cause parse errors in PHP 7+. 204 array( '//fonts.googleapis.com/css?family=Open+Sans:400&subset=latin', PHP_URL_HOST, 'fonts.googleapis.com' ), 205 array( '//fonts.googleapis.com/css?family=Open+Sans:400&subset=latin', PHP_URL_PORT, null ), 206 array( '//fonts.googleapis.com/css?family=Open+Sans:400&subset=latin', PHP_URL_PATH, '/css' ), 207 array( '//fonts.googleapis.com/css?family=Open+Sans:400&subset=latin', PHP_URL_QUERY, 'family=Open+Sans:400&subset=latin' ), 208 array( '//fonts.googleapis.com/css?family=Open+Sans:400', PHP_URL_HOST, 'fonts.googleapis.com' ), // 25 209 array( '//fonts.googleapis.com/css?family=Open+Sans:400', PHP_URL_PORT, null ), 210 array( '//fonts.googleapis.com/css?family=Open+Sans:400', PHP_URL_PATH, '/css' ), //27 211 array( '//fonts.googleapis.com/css?family=Open+Sans:400', PHP_URL_QUERY, 'family=Open+Sans:400' ), //28 212 213 // Query parameter starting with & instead of ? 214 array( 'http://www.test.com/path1/path2/&q=a', PHP_URL_PATH, '/path1/path2/' ), 215 array( 'http://www.test.com/path1/path2/&q=a', PHP_URL_QUERY, 'q=a' ), 216 array( 'http://www.test.com/path1/path2/file.php&q=a', PHP_URL_PATH, '/path1/path2/file.php' ), 217 array( 'http://www.test.com/path1/path2/file.php&q=a', PHP_URL_QUERY, 'q=a' ), 178 218 ); 179 219 } 180 220