| 403 | /** |
| 404 | * compare two or more URLs with each other |
| 405 | * |
| 406 | * @since 3.0.1 |
| 407 | * @see RFC 2612 section 3.2.3 {@link http://www.ietf.org/rfc/rfc2616.txt} |
| 408 | * |
| 409 | * @param string $url1 first URL |
| 410 | * @param string $url2 other URL |
| 411 | * @return int number of different URLs (0 for no difference) |
| 412 | */ |
| 413 | function url_compare( $url1, $url2 ) { |
| 414 | $urls = func_get_args(); |
| 415 | $urls = array_map( 'url_normalize', $urls ); |
| 416 | $urls = array_unique( $urls ); |
| 417 | return count( $urls ) - 1; |
| 418 | } |
| 419 | |
| 420 | /** |
| 421 | * normalize a URL |
| 422 | * |
| 423 | * some basic protocols are supported like HTTP, HTTPS, FTP |
| 424 | * |
| 425 | * @since 3.0.1 |
| 426 | * @param string $url URL to normalize |
| 427 | * @param array $default_ports (optional) sheme-keyed ([a-z]+) array of default ports (integer) |
| 428 | * @param string normalized URL, empty string if URL was invalid |
| 429 | */ |
| 430 | function url_normalize( $url , $default_ports = array() ) { |
| 431 | // most popular default ports e.g. file, ftp, gopher, http, mailto, nntp, news, telnet or further ldap, irc, phone, fax, tv [POWELL 1998 HTML]) |
| 432 | $default_ports = array_merge( array( 'ftp' => 21, 'http' => 80, 'https' => 443 ), $default_ports ); |
| 433 | |
| 434 | // check for invalid characters, an invalid URL is "normalized" as empty string |
| 435 | $nurl = (string) $url; |
| 436 | $result = preg_match('([\x00-\x20\x7F-\xFF])', $nurl); |
| 437 | if ( $result ) |
| 438 | return ''; |
| 439 | $result = null; |
| 440 | |
| 441 | // normalize triplets |
| 442 | if ( false !== strpos( $nurl, '%' ) ) { |
| 443 | // normalize triplets case to lowercase |
| 444 | $nurl = preg_replace_callback( '(%[a-f0-9]{2})i', 'url_normalize_triplets', $nurl ); |
| 445 | |
| 446 | // normalize mark triplets which might but are not to be encoded in a normlaized URL |
| 447 | $unreserved = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.!~*'()"; |
| 448 | $i = 0; |
| 449 | $m = strlen( $unreserved ); |
| 450 | while ( $i < $m ) |
| 451 | $nurl = str_replace( '%' . dechex( ord( $c = $unreserved[$i++] ) ), $c, $nurl ); |
| 452 | $unreserved = $i = $m = $c = null; |
| 453 | } |
| 454 | |
| 455 | // malformed invalid URL is "normalized" as emtpy string |
| 456 | $parts = @parse_url( $nurl ); |
| 457 | if ( false === $parts ) |
| 458 | return ''; |
| 459 | |
| 460 | // normalize sheme, host, port, (abs_)path and query |
| 461 | $count = extract( $parts ); |
| 462 | |
| 463 | // normalize host |
| 464 | isset( $host ) && $host = strtolower( $host ); |
| 465 | |
| 466 | // normalize scheme and it's according default port |
| 467 | isset( $scheme ) && ( $scheme = strtolower( $scheme ) ) |
| 468 | && isset( $port ) && isset( $default_ports[$scheme] ) && $default_ports[$scheme] == $port && ( $port = null ); |
| 469 | |
| 470 | // normalize (abs_)path |
| 471 | isset( $path ) && $path == '/' && $path = ''; |
| 472 | |
| 473 | // normalize query (sort and filter out empty entries) |
| 474 | isset( $query ) && ( $query = explode( '&', $query ) ) && asort( $query ) |
| 475 | && $query = implode( '&', array_filter( $query ) ); |
| 476 | |
| 477 | // build normalized URL |
| 478 | $nurl = ''; |
| 479 | isset( $scheme ) && $nurl = $scheme . '://'; |
| 480 | |
| 481 | if ( isset( $user ) || isset( $pass) ) { |
| 482 | isset( $user ) && $nurl .= $user; |
| 483 | isset( $pass ) && $nurl .= ':' . $pass; |
| 484 | $nurl .= '@'; |
| 485 | } |
| 486 | |
| 487 | isset( $host ) && $nurl .= $host; |
| 488 | isset( $path ) && $nurl .= $path; |
| 489 | isset( $query ) && $nurl .= '?' . $query; |
| 490 | isset( $fragment ) && $nurl .= '#' . $fragment; |
| 491 | |
| 492 | return $nurl; |
| 493 | } |
| 494 | |
| 495 | /** |
| 496 | * callback to lowercase first match |
| 497 | * |
| 498 | * @since 3.0.1 |
| 499 | * @note only to be used by url_normalize |
| 500 | * @param array $matches matches |
| 501 | * @return string lowercase first match |
| 502 | */ |
| 503 | function url_normalize_triplets( $matches ) {# |
| 504 | return strtolower( $matches[0] ); |
| 505 | } |
| 506 | |
| 507 | |