Make WordPress Core

Ticket #14292: 14292.3.patch

File 14292.3.patch, 4.3 KB (added by hakre, 15 years ago)

Introducing url_normalize() and url_compare()

  • wp-includes/canonical.php

     
    4747                $requested_url .= $_SERVER['REQUEST_URI'];
    4848        }
    4949
     50        $requested_url = url_normalize( $requested_url );
     51
    5052        $original = @parse_url($requested_url);
    5153        if ( false === $original )
    5254                return;
     
    343345
    344346        if ( !$redirect_url || $redirect_url == $requested_url )
    345347                return false;
    346                
    347         // Hex encoded octets are case-insensitive.
    348         if ( false !== strpos($requested_url, '%') ) {
    349                 if ( !function_exists('lowercase_octets') ) {
    350                         function lowercase_octets($matches) {
    351                                 return strtolower( $matches[0] );
    352                         }
    353                 }
    354                 $requested_url = preg_replace_callback('|%[a-fA-F0-9][a-fA-F0-9]|', 'lowercase_octets', $requested_url);
    355         }
    356348
    357349        // Note that you can use the "redirect_canonical" filter to cancel a canonical redirect for whatever reason by returning FALSE
    358350        $redirect_url = apply_filters('redirect_canonical', $redirect_url, $requested_url);
     
    408400        return get_permalink($post_id);
    409401}
    410402
     403/**
     404 * compare two or more URLs with each other
     405 *
     406 * @since 3.0.1
     407 * @see RFC 2612 section 3.2.3 {@link http://www.ietf.org/rfc/rfc2616.txt}
     408 *
     409 * @param  string $url1 first URL
     410 * @param  string $url2 other URL
     411 * @return int number of different URLs (0 for no difference)
     412 */
     413function url_compare( $url1, $url2 ) {
     414        $urls = func_get_args();
     415        $urls = array_map( 'url_normalize', $urls );
     416        $urls = array_unique( $urls );
     417        return count( $urls ) - 1;
     418}
     419
     420/**
     421 * normalize a URL
     422 *
     423 * some basic protocols are supported like HTTP, HTTPS, FTP
     424 *
     425 * @since 3.0.1
     426 * @param string $url URL to normalize
     427 * @param array $default_ports (optional) sheme-keyed ([a-z]+) array of default ports (integer)
     428 * @param string normalized URL, empty string if URL was invalid
     429 */
     430function url_normalize( $url , $default_ports = array() ) {     
     431        // most popular default ports e.g. file, ftp, gopher, http, mailto, nntp, news, telnet or further ldap, irc, phone, fax, tv [POWELL 1998 HTML])
     432        $default_ports = array_merge( array( 'ftp' => 21, 'http' => 80, 'https' => 443 ), $default_ports );
     433       
     434        // check for invalid characters, an invalid URL is "normalized" as empty string
     435        $nurl   = (string) $url;
     436        $result = preg_match('([\x00-\x20\x7F])', $nurl);
     437        if ( $result )
     438                return '';
     439        $result = null;
     440
     441        // normalize triplets
     442        if ( false !== strpos( $nurl, '%' ) ) {
     443                // normalize triplets case to lowercase
     444                $nurl = preg_replace_callback( '(%[a-f0-9]{2})i', 'url_normalize_triplets', $nurl );
     445               
     446                // normalize mark triplets which might but are not to be encoded in a normlaized URL
     447                $unreserved = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.!~*'()";
     448                $i = 0;
     449                $m = strlen( $unreserved );
     450                while ( $i < $m )
     451                        $nurl = str_replace( '%' . dechex( ord( $c = $unreserved[$i++] ) ), $c, $nurl );
     452                $i = $m = $c = null;
     453        }
     454
     455        // malformed invalid URL is "normalized" as emtpy string
     456        $parts = @parse_url( $nurl );
     457        if ( false === $parts )
     458                return '';
     459
     460        // normalize sheme, host, port and (abs_)path
     461        $count = extract( $parts );
     462
     463        // normalize host
     464        isset( $host ) && $host = strtolower( $host );
     465
     466        // normalize scheme and it's according default port
     467        isset( $scheme ) && ( $scheme = strtolower( $scheme ) )
     468        && isset( $port ) && isset( $default_ports[$scheme] ) && $default_ports[$scheme] == $port && ( $port = null );
     469
     470        // normalize (abs_)path
     471        isset( $path ) && $path == '/' && $path = '';
     472       
     473        // build normalized URL
     474        $nurl = '';
     475        isset( $scheme ) && $nurl = $scheme . '://';
     476       
     477        if ( isset( $user ) || isset( $pass) ) {
     478                isset( $user ) && $nurl .= $user;
     479                isset( $pass ) && $nurl .= ':' . $pass;
     480                $nurl .= '@';
     481        }
     482       
     483        isset ( $host )     && $nurl .= $host;
     484        isset ( $path )     && $nurl .= $path;
     485        isset ( $query )    && $nurl .= '?' . $query;
     486        isset ( $fragment ) && $nurl .= '#' . $fragment;
     487
     488        return $nurl;
     489}
     490
     491/**
     492 * callback to lowercase first match
     493 *
     494 * @since 3.0.1
     495 * @note only to be used by url_normalize
     496 * @param array $matches matches
     497 * @return string lowercase first match
     498 */
     499function url_normalize_triplets( $matches ) {#
     500        return strtolower( $matches[0] );
     501}
     502
     503
    411504add_action('template_redirect', 'redirect_canonical');
    412505
    413 ?>
     506?>
     507 No newline at end of file