Changeset 59141 for trunk/src/wp-includes/SimplePie/src/IRI.php
- Timestamp:
- 09/30/2024 10:48:16 PM (8 months ago)
- Location:
- trunk/src/wp-includes/SimplePie/src
- Files:
-
- 1 added
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/SimplePie/src/IRI.php
r59140 r59141 1 1 <?php 2 2 3 /** 3 4 * SimplePie … … 6 7 * Takes the hard work out of managing a complete RSS/Atom solution. 7 8 * 8 * Copyright (c) 2004-20 16, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors9 * Copyright (c) 2004-2022, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors 9 10 * All rights reserved. 10 11 * … … 42 43 */ 43 44 45 namespace SimplePie; 46 44 47 /** 45 48 * IRI parser/serialiser/normaliser … … 53 56 * @license http://www.opensource.org/licenses/bsd-license.php 54 57 */ 55 class SimplePie_IRI58 class IRI 56 59 { 57 /** 58 * Scheme 59 * 60 * @var string 61 */ 62 protected $scheme = null; 63 64 /** 65 * User Information 66 * 67 * @var string 68 */ 69 protected $iuserinfo = null; 70 71 /** 72 * ihost 73 * 74 * @var string 75 */ 76 protected $ihost = null; 77 78 /** 79 * Port 80 * 81 * @var string 82 */ 83 protected $port = null; 84 85 /** 86 * ipath 87 * 88 * @var string 89 */ 90 protected $ipath = ''; 91 92 /** 93 * iquery 94 * 95 * @var string 96 */ 97 protected $iquery = null; 98 99 /** 100 * ifragment 101 * 102 * @var string 103 */ 104 protected $ifragment = null; 105 106 /** 107 * Normalization database 108 * 109 * Each key is the scheme, each value is an array with each key as the IRI 110 * part and value as the default value for that part. 111 */ 112 protected $normalization = array( 113 'acap' => array( 114 'port' => 674 115 ), 116 'dict' => array( 117 'port' => 2628 118 ), 119 'file' => array( 120 'ihost' => 'localhost' 121 ), 122 'http' => array( 123 'port' => 80, 124 'ipath' => '/' 125 ), 126 'https' => array( 127 'port' => 443, 128 'ipath' => '/' 129 ), 130 ); 131 132 /** 133 * Return the entire IRI when you try and read the object as a string 134 * 135 * @return string 136 */ 137 public function __toString() 138 { 139 return $this->get_iri(); 140 } 141 142 /** 143 * Overload __set() to provide access via properties 144 * 145 * @param string $name Property name 146 * @param mixed $value Property value 147 */ 148 public function __set($name, $value) 149 { 150 if (method_exists($this, 'set_' . $name)) 151 { 152 call_user_func(array($this, 'set_' . $name), $value); 153 } 154 elseif ( 155 $name === 'iauthority' 156 || $name === 'iuserinfo' 157 || $name === 'ihost' 158 || $name === 'ipath' 159 || $name === 'iquery' 160 || $name === 'ifragment' 161 ) 162 { 163 call_user_func(array($this, 'set_' . substr($name, 1)), $value); 164 } 165 } 166 167 /** 168 * Overload __get() to provide access via properties 169 * 170 * @param string $name Property name 171 * @return mixed 172 */ 173 public function __get($name) 174 { 175 // isset() returns false for null, we don't want to do that 176 // Also why we use array_key_exists below instead of isset() 177 $props = get_object_vars($this); 178 179 if ( 180 $name === 'iri' || 181 $name === 'uri' || 182 $name === 'iauthority' || 183 $name === 'authority' 184 ) 185 { 186 $return = $this->{"get_$name"}(); 187 } 188 elseif (array_key_exists($name, $props)) 189 { 190 $return = $this->$name; 191 } 192 // host -> ihost 193 elseif (($prop = 'i' . $name) && array_key_exists($prop, $props)) 194 { 195 $name = $prop; 196 $return = $this->$prop; 197 } 198 // ischeme -> scheme 199 elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props)) 200 { 201 $name = $prop; 202 $return = $this->$prop; 203 } 204 else 205 { 206 trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE); 207 $return = null; 208 } 209 210 if ($return === null && isset($this->normalization[$this->scheme][$name])) 211 { 212 return $this->normalization[$this->scheme][$name]; 213 } 214 215 return $return; 216 } 217 218 /** 219 * Overload __isset() to provide access via properties 220 * 221 * @param string $name Property name 222 * @return bool 223 */ 224 public function __isset($name) 225 { 226 return method_exists($this, 'get_' . $name) || isset($this->$name); 227 } 228 229 /** 230 * Overload __unset() to provide access via properties 231 * 232 * @param string $name Property name 233 */ 234 public function __unset($name) 235 { 236 if (method_exists($this, 'set_' . $name)) 237 { 238 call_user_func(array($this, 'set_' . $name), ''); 239 } 240 } 241 242 /** 243 * Create a new IRI object, from a specified string 244 * 245 * @param string $iri 246 */ 247 public function __construct($iri = null) 248 { 249 $this->set_iri($iri); 250 } 251 252 /** 253 * Clean up 254 */ 255 public function __destruct() { 256 $this->set_iri(null, true); 257 $this->set_path(null, true); 258 $this->set_authority(null, true); 259 } 260 261 /** 262 * Create a new IRI object by resolving a relative IRI 263 * 264 * Returns false if $base is not absolute, otherwise an IRI. 265 * 266 * @param IRI|string $base (Absolute) Base IRI 267 * @param IRI|string $relative Relative IRI 268 * @return IRI|false 269 */ 270 public static function absolutize($base, $relative) 271 { 272 if (!($relative instanceof SimplePie_IRI)) 273 { 274 $relative = new SimplePie_IRI($relative); 275 } 276 if (!$relative->is_valid()) 277 { 278 return false; 279 } 280 elseif ($relative->scheme !== null) 281 { 282 return clone $relative; 283 } 284 else 285 { 286 if (!($base instanceof SimplePie_IRI)) 287 { 288 $base = new SimplePie_IRI($base); 289 } 290 if ($base->scheme !== null && $base->is_valid()) 291 { 292 if ($relative->get_iri() !== '') 293 { 294 if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null) 295 { 296 $target = clone $relative; 297 $target->scheme = $base->scheme; 298 } 299 else 300 { 301 $target = new SimplePie_IRI; 302 $target->scheme = $base->scheme; 303 $target->iuserinfo = $base->iuserinfo; 304 $target->ihost = $base->ihost; 305 $target->port = $base->port; 306 if ($relative->ipath !== '') 307 { 308 if ($relative->ipath[0] === '/') 309 { 310 $target->ipath = $relative->ipath; 311 } 312 elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '') 313 { 314 $target->ipath = '/' . $relative->ipath; 315 } 316 elseif (($last_segment = strrpos($base->ipath, '/')) !== false) 317 { 318 $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath; 319 } 320 else 321 { 322 $target->ipath = $relative->ipath; 323 } 324 $target->ipath = $target->remove_dot_segments($target->ipath); 325 $target->iquery = $relative->iquery; 326 } 327 else 328 { 329 $target->ipath = $base->ipath; 330 if ($relative->iquery !== null) 331 { 332 $target->iquery = $relative->iquery; 333 } 334 elseif ($base->iquery !== null) 335 { 336 $target->iquery = $base->iquery; 337 } 338 } 339 $target->ifragment = $relative->ifragment; 340 } 341 } 342 else 343 { 344 $target = clone $base; 345 $target->ifragment = null; 346 } 347 $target->scheme_normalization(); 348 return $target; 349 } 350 351 return false; 352 } 353 } 354 355 /** 356 * Parse an IRI into scheme/authority/path/query/fragment segments 357 * 358 * @param string $iri 359 * @return array 360 */ 361 protected function parse_iri($iri) 362 { 363 $iri = trim($iri, "\x20\x09\x0A\x0C\x0D"); 364 if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match)) 365 { 366 if ($match[1] === '') 367 { 368 $match['scheme'] = null; 369 } 370 if (!isset($match[3]) || $match[3] === '') 371 { 372 $match['authority'] = null; 373 } 374 if (!isset($match[5])) 375 { 376 $match['path'] = ''; 377 } 378 if (!isset($match[6]) || $match[6] === '') 379 { 380 $match['query'] = null; 381 } 382 if (!isset($match[8]) || $match[8] === '') 383 { 384 $match['fragment'] = null; 385 } 386 return $match; 387 } 388 389 // This can occur when a paragraph is accidentally parsed as a URI 390 return false; 391 } 392 393 /** 394 * Remove dot segments from a path 395 * 396 * @param string $input 397 * @return string 398 */ 399 protected function remove_dot_segments($input) 400 { 401 $output = ''; 402 while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') 403 { 404 // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise, 405 if (strpos($input, '../') === 0) 406 { 407 $input = substr($input, 3); 408 } 409 elseif (strpos($input, './') === 0) 410 { 411 $input = substr($input, 2); 412 } 413 // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise, 414 elseif (strpos($input, '/./') === 0) 415 { 416 $input = substr($input, 2); 417 } 418 elseif ($input === '/.') 419 { 420 $input = '/'; 421 } 422 // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise, 423 elseif (strpos($input, '/../') === 0) 424 { 425 $input = substr($input, 3); 426 $output = substr_replace($output, '', strrpos($output, '/')); 427 } 428 elseif ($input === '/..') 429 { 430 $input = '/'; 431 $output = substr_replace($output, '', strrpos($output, '/')); 432 } 433 // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise, 434 elseif ($input === '.' || $input === '..') 435 { 436 $input = ''; 437 } 438 // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer 439 elseif (($pos = strpos($input, '/', 1)) !== false) 440 { 441 $output .= substr($input, 0, $pos); 442 $input = substr_replace($input, '', 0, $pos); 443 } 444 else 445 { 446 $output .= $input; 447 $input = ''; 448 } 449 } 450 return $output . $input; 451 } 452 453 /** 454 * Replace invalid character with percent encoding 455 * 456 * @param string $string Input string 457 * @param string $extra_chars Valid characters not in iunreserved or 458 * iprivate (this is ASCII-only) 459 * @param bool $iprivate Allow iprivate 460 * @return string 461 */ 462 protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false) 463 { 464 // Normalize as many pct-encoded sections as possible 465 $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array($this, 'remove_iunreserved_percent_encoded'), $string); 466 467 // Replace invalid percent characters 468 $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string); 469 470 // Add unreserved and % to $extra_chars (the latter is safe because all 471 // pct-encoded sections are now valid). 472 $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%'; 473 474 // Now replace any bytes that aren't allowed with their pct-encoded versions 475 $position = 0; 476 $strlen = strlen($string); 477 while (($position += strspn($string, $extra_chars, $position)) < $strlen) 478 { 479 $value = ord($string[$position]); 480 481 // Start position 482 $start = $position; 483 484 // By default we are valid 485 $valid = true; 486 487 // No one byte sequences are valid due to the while. 488 // Two byte sequence: 489 if (($value & 0xE0) === 0xC0) 490 { 491 $character = ($value & 0x1F) << 6; 492 $length = 2; 493 $remaining = 1; 494 } 495 // Three byte sequence: 496 elseif (($value & 0xF0) === 0xE0) 497 { 498 $character = ($value & 0x0F) << 12; 499 $length = 3; 500 $remaining = 2; 501 } 502 // Four byte sequence: 503 elseif (($value & 0xF8) === 0xF0) 504 { 505 $character = ($value & 0x07) << 18; 506 $length = 4; 507 $remaining = 3; 508 } 509 // Invalid byte: 510 else 511 { 512 $valid = false; 513 $length = 1; 514 $remaining = 0; 515 } 516 517 if ($remaining) 518 { 519 if ($position + $length <= $strlen) 520 { 521 for ($position++; $remaining; $position++) 522 { 523 $value = ord($string[$position]); 524 525 // Check that the byte is valid, then add it to the character: 526 if (($value & 0xC0) === 0x80) 527 { 528 $character |= ($value & 0x3F) << (--$remaining * 6); 529 } 530 // If it is invalid, count the sequence as invalid and reprocess the current byte: 531 else 532 { 533 $valid = false; 534 $position--; 535 break; 536 } 537 } 538 } 539 else 540 { 541 $position = $strlen - 1; 542 $valid = false; 543 } 544 } 545 546 // Percent encode anything invalid or not in ucschar 547 if ( 548 // Invalid sequences 549 !$valid 550 // Non-shortest form sequences are invalid 551 || $length > 1 && $character <= 0x7F 552 || $length > 2 && $character <= 0x7FF 553 || $length > 3 && $character <= 0xFFFF 554 // Outside of range of ucschar codepoints 555 // Noncharacters 556 || ($character & 0xFFFE) === 0xFFFE 557 || $character >= 0xFDD0 && $character <= 0xFDEF 558 || ( 559 // Everything else not in ucschar 560 $character > 0xD7FF && $character < 0xF900 561 || $character < 0xA0 562 || $character > 0xEFFFD 563 ) 564 && ( 565 // Everything not in iprivate, if it applies 566 !$iprivate 567 || $character < 0xE000 568 || $character > 0x10FFFD 569 ) 570 ) 571 { 572 // If we were a character, pretend we weren't, but rather an error. 573 if ($valid) 574 $position--; 575 576 for ($j = $start; $j <= $position; $j++) 577 { 578 $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1); 579 $j += 2; 580 $position += 2; 581 $strlen += 2; 582 } 583 } 584 } 585 586 return $string; 587 } 588 589 /** 590 * Callback function for preg_replace_callback. 591 * 592 * Removes sequences of percent encoded bytes that represent UTF-8 593 * encoded characters in iunreserved 594 * 595 * @param array $match PCRE match 596 * @return string Replacement 597 */ 598 protected function remove_iunreserved_percent_encoded($match) 599 { 600 // As we just have valid percent encoded sequences we can just explode 601 // and ignore the first member of the returned array (an empty string). 602 $bytes = explode('%', $match[0]); 603 604 // Initialize the new string (this is what will be returned) and that 605 // there are no bytes remaining in the current sequence (unsurprising 606 // at the first byte!). 607 $string = ''; 608 $remaining = 0; 609 610 // Loop over each and every byte, and set $value to its value 611 for ($i = 1, $len = count($bytes); $i < $len; $i++) 612 { 613 $value = hexdec($bytes[$i]); 614 615 // If we're the first byte of sequence: 616 if (!$remaining) 617 { 618 // Start position 619 $start = $i; 620 621 // By default we are valid 622 $valid = true; 623 624 // One byte sequence: 625 if ($value <= 0x7F) 626 { 627 $character = $value; 628 $length = 1; 629 } 630 // Two byte sequence: 631 elseif (($value & 0xE0) === 0xC0) 632 { 633 $character = ($value & 0x1F) << 6; 634 $length = 2; 635 $remaining = 1; 636 } 637 // Three byte sequence: 638 elseif (($value & 0xF0) === 0xE0) 639 { 640 $character = ($value & 0x0F) << 12; 641 $length = 3; 642 $remaining = 2; 643 } 644 // Four byte sequence: 645 elseif (($value & 0xF8) === 0xF0) 646 { 647 $character = ($value & 0x07) << 18; 648 $length = 4; 649 $remaining = 3; 650 } 651 // Invalid byte: 652 else 653 { 654 $valid = false; 655 $remaining = 0; 656 } 657 } 658 // Continuation byte: 659 else 660 { 661 // Check that the byte is valid, then add it to the character: 662 if (($value & 0xC0) === 0x80) 663 { 664 $remaining--; 665 $character |= ($value & 0x3F) << ($remaining * 6); 666 } 667 // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence: 668 else 669 { 670 $valid = false; 671 $remaining = 0; 672 $i--; 673 } 674 } 675 676 // If we've reached the end of the current byte sequence, append it to Unicode::$data 677 if (!$remaining) 678 { 679 // Percent encode anything invalid or not in iunreserved 680 if ( 681 // Invalid sequences 682 !$valid 683 // Non-shortest form sequences are invalid 684 || $length > 1 && $character <= 0x7F 685 || $length > 2 && $character <= 0x7FF 686 || $length > 3 && $character <= 0xFFFF 687 // Outside of range of iunreserved codepoints 688 || $character < 0x2D 689 || $character > 0xEFFFD 690 // Noncharacters 691 || ($character & 0xFFFE) === 0xFFFE 692 || $character >= 0xFDD0 && $character <= 0xFDEF 693 // Everything else not in iunreserved (this is all BMP) 694 || $character === 0x2F 695 || $character > 0x39 && $character < 0x41 696 || $character > 0x5A && $character < 0x61 697 || $character > 0x7A && $character < 0x7E 698 || $character > 0x7E && $character < 0xA0 699 || $character > 0xD7FF && $character < 0xF900 700 ) 701 { 702 for ($j = $start; $j <= $i; $j++) 703 { 704 $string .= '%' . strtoupper($bytes[$j]); 705 } 706 } 707 else 708 { 709 for ($j = $start; $j <= $i; $j++) 710 { 711 $string .= chr(hexdec($bytes[$j])); 712 } 713 } 714 } 715 } 716 717 // If we have any bytes left over they are invalid (i.e., we are 718 // mid-way through a multi-byte sequence) 719 if ($remaining) 720 { 721 for ($j = $start; $j < $len; $j++) 722 { 723 $string .= '%' . strtoupper($bytes[$j]); 724 } 725 } 726 727 return $string; 728 } 729 730 protected function scheme_normalization() 731 { 732 if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo']) 733 { 734 $this->iuserinfo = null; 735 } 736 if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost']) 737 { 738 $this->ihost = null; 739 } 740 if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port']) 741 { 742 $this->port = null; 743 } 744 if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath']) 745 { 746 $this->ipath = ''; 747 } 748 if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery']) 749 { 750 $this->iquery = null; 751 } 752 if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment']) 753 { 754 $this->ifragment = null; 755 } 756 } 757 758 /** 759 * Check if the object represents a valid IRI. This needs to be done on each 760 * call as some things change depending on another part of the IRI. 761 * 762 * @return bool 763 */ 764 public function is_valid() 765 { 766 if ($this->ipath === '') return true; 767 768 $isauthority = $this->iuserinfo !== null || $this->ihost !== null || 769 $this->port !== null; 770 if ($isauthority && $this->ipath[0] === '/') return true; 771 772 if (!$isauthority && (substr($this->ipath, 0, 2) === '//')) return false; 773 774 // Relative urls cannot have a colon in the first path segment (and the 775 // slashes themselves are not included so skip the first character). 776 if (!$this->scheme && !$isauthority && 777 strpos($this->ipath, ':') !== false && 778 strpos($this->ipath, '/', 1) !== false && 779 strpos($this->ipath, ':') < strpos($this->ipath, '/', 1)) return false; 780 781 return true; 782 } 783 784 /** 785 * Set the entire IRI. Returns true on success, false on failure (if there 786 * are any invalid characters). 787 * 788 * @param string $iri 789 * @return bool 790 */ 791 public function set_iri($iri, $clear_cache = false) 792 { 793 static $cache; 794 if ($clear_cache) 795 { 796 $cache = null; 797 return; 798 } 799 if (!$cache) 800 { 801 $cache = array(); 802 } 803 804 if ($iri === null) 805 { 806 return true; 807 } 808 elseif (isset($cache[$iri])) 809 { 810 list($this->scheme, 811 $this->iuserinfo, 812 $this->ihost, 813 $this->port, 814 $this->ipath, 815 $this->iquery, 816 $this->ifragment, 817 $return) = $cache[$iri]; 818 return $return; 819 } 820 821 $parsed = $this->parse_iri((string) $iri); 822 if (!$parsed) 823 { 824 return false; 825 } 826 827 $return = $this->set_scheme($parsed['scheme']) 828 && $this->set_authority($parsed['authority']) 829 && $this->set_path($parsed['path']) 830 && $this->set_query($parsed['query']) 831 && $this->set_fragment($parsed['fragment']); 832 833 $cache[$iri] = array($this->scheme, 834 $this->iuserinfo, 835 $this->ihost, 836 $this->port, 837 $this->ipath, 838 $this->iquery, 839 $this->ifragment, 840 $return); 841 return $return; 842 } 843 844 /** 845 * Set the scheme. Returns true on success, false on failure (if there are 846 * any invalid characters). 847 * 848 * @param string $scheme 849 * @return bool 850 */ 851 public function set_scheme($scheme) 852 { 853 if ($scheme === null) 854 { 855 $this->scheme = null; 856 } 857 elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme)) 858 { 859 $this->scheme = null; 860 return false; 861 } 862 else 863 { 864 $this->scheme = strtolower($scheme); 865 } 866 return true; 867 } 868 869 /** 870 * Set the authority. Returns true on success, false on failure (if there are 871 * any invalid characters). 872 * 873 * @param string $authority 874 * @return bool 875 */ 876 public function set_authority($authority, $clear_cache = false) 877 { 878 static $cache; 879 if ($clear_cache) 880 { 881 $cache = null; 882 return; 883 } 884 if (!$cache) 885 $cache = array(); 886 887 if ($authority === null) 888 { 889 $this->iuserinfo = null; 890 $this->ihost = null; 891 $this->port = null; 892 return true; 893 } 894 elseif (isset($cache[$authority])) 895 { 896 list($this->iuserinfo, 897 $this->ihost, 898 $this->port, 899 $return) = $cache[$authority]; 900 901 return $return; 902 } 903 904 $remaining = $authority; 905 if (($iuserinfo_end = strrpos($remaining, '@')) !== false) 906 { 907 $iuserinfo = substr($remaining, 0, $iuserinfo_end); 908 $remaining = substr($remaining, $iuserinfo_end + 1); 909 } 910 else 911 { 912 $iuserinfo = null; 913 } 914 if (($port_start = strpos($remaining, ':', strpos($remaining, ']'))) !== false) 915 { 916 if (($port = substr($remaining, $port_start + 1)) === false) 917 { 918 $port = null; 919 } 920 $remaining = substr($remaining, 0, $port_start); 921 } 922 else 923 { 924 $port = null; 925 } 926 927 $return = $this->set_userinfo($iuserinfo) && 928 $this->set_host($remaining) && 929 $this->set_port($port); 930 931 $cache[$authority] = array($this->iuserinfo, 932 $this->ihost, 933 $this->port, 934 $return); 935 936 return $return; 937 } 938 939 /** 940 * Set the iuserinfo. 941 * 942 * @param string $iuserinfo 943 * @return bool 944 */ 945 public function set_userinfo($iuserinfo) 946 { 947 if ($iuserinfo === null) 948 { 949 $this->iuserinfo = null; 950 } 951 else 952 { 953 $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:'); 954 $this->scheme_normalization(); 955 } 956 957 return true; 958 } 959 960 /** 961 * Set the ihost. Returns true on success, false on failure (if there are 962 * any invalid characters). 963 * 964 * @param string $ihost 965 * @return bool 966 */ 967 public function set_host($ihost) 968 { 969 if ($ihost === null) 970 { 971 $this->ihost = null; 972 return true; 973 } 974 elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']') 975 { 976 if (SimplePie_Net_IPv6::check_ipv6(substr($ihost, 1, -1))) 977 { 978 $this->ihost = '[' . SimplePie_Net_IPv6::compress(substr($ihost, 1, -1)) . ']'; 979 } 980 else 981 { 982 $this->ihost = null; 983 return false; 984 } 985 } 986 else 987 { 988 $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;='); 989 990 // Lowercase, but ignore pct-encoded sections (as they should 991 // remain uppercase). This must be done after the previous step 992 // as that can add unescaped characters. 993 $position = 0; 994 $strlen = strlen($ihost); 995 while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen) 996 { 997 if ($ihost[$position] === '%') 998 { 999 $position += 3; 1000 } 1001 else 1002 { 1003 $ihost[$position] = strtolower($ihost[$position]); 1004 $position++; 1005 } 1006 } 1007 1008 $this->ihost = $ihost; 1009 } 1010 1011 $this->scheme_normalization(); 1012 1013 return true; 1014 } 1015 1016 /** 1017 * Set the port. Returns true on success, false on failure (if there are 1018 * any invalid characters). 1019 * 1020 * @param string $port 1021 * @return bool 1022 */ 1023 public function set_port($port) 1024 { 1025 if ($port === null) 1026 { 1027 $this->port = null; 1028 return true; 1029 } 1030 elseif (strspn($port, '0123456789') === strlen($port)) 1031 { 1032 $this->port = (int) $port; 1033 $this->scheme_normalization(); 1034 return true; 1035 } 1036 1037 $this->port = null; 1038 return false; 1039 } 1040 1041 /** 1042 * Set the ipath. 1043 * 1044 * @param string $ipath 1045 * @return bool 1046 */ 1047 public function set_path($ipath, $clear_cache = false) 1048 { 1049 static $cache; 1050 if ($clear_cache) 1051 { 1052 $cache = null; 1053 return; 1054 } 1055 if (!$cache) 1056 { 1057 $cache = array(); 1058 } 1059 1060 $ipath = (string) $ipath; 1061 1062 if (isset($cache[$ipath])) 1063 { 1064 $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)]; 1065 } 1066 else 1067 { 1068 $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/'); 1069 $removed = $this->remove_dot_segments($valid); 1070 1071 $cache[$ipath] = array($valid, $removed); 1072 $this->ipath = ($this->scheme !== null) ? $removed : $valid; 1073 } 1074 1075 $this->scheme_normalization(); 1076 return true; 1077 } 1078 1079 /** 1080 * Set the iquery. 1081 * 1082 * @param string $iquery 1083 * @return bool 1084 */ 1085 public function set_query($iquery) 1086 { 1087 if ($iquery === null) 1088 { 1089 $this->iquery = null; 1090 } 1091 else 1092 { 1093 $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true); 1094 $this->scheme_normalization(); 1095 } 1096 return true; 1097 } 1098 1099 /** 1100 * Set the ifragment. 1101 * 1102 * @param string $ifragment 1103 * @return bool 1104 */ 1105 public function set_fragment($ifragment) 1106 { 1107 if ($ifragment === null) 1108 { 1109 $this->ifragment = null; 1110 } 1111 else 1112 { 1113 $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?'); 1114 $this->scheme_normalization(); 1115 } 1116 return true; 1117 } 1118 1119 /** 1120 * Convert an IRI to a URI (or parts thereof) 1121 * 1122 * @return string 1123 */ 1124 public function to_uri($string) 1125 { 1126 static $non_ascii; 1127 if (!$non_ascii) 1128 { 1129 $non_ascii = implode('', range("\x80", "\xFF")); 1130 } 1131 1132 $position = 0; 1133 $strlen = strlen($string); 1134 while (($position += strcspn($string, $non_ascii, $position)) < $strlen) 1135 { 1136 $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1); 1137 $position += 3; 1138 $strlen += 2; 1139 } 1140 1141 return $string; 1142 } 1143 1144 /** 1145 * Get the complete IRI 1146 * 1147 * @return string 1148 */ 1149 public function get_iri() 1150 { 1151 if (!$this->is_valid()) 1152 { 1153 return false; 1154 } 1155 1156 $iri = ''; 1157 if ($this->scheme !== null) 1158 { 1159 $iri .= $this->scheme . ':'; 1160 } 1161 if (($iauthority = $this->get_iauthority()) !== null) 1162 { 1163 $iri .= '//' . $iauthority; 1164 } 1165 if ($this->ipath !== '') 1166 { 1167 $iri .= $this->ipath; 1168 } 1169 elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '') 1170 { 1171 $iri .= $this->normalization[$this->scheme]['ipath']; 1172 } 1173 if ($this->iquery !== null) 1174 { 1175 $iri .= '?' . $this->iquery; 1176 } 1177 if ($this->ifragment !== null) 1178 { 1179 $iri .= '#' . $this->ifragment; 1180 } 1181 1182 return $iri; 1183 } 1184 1185 /** 1186 * Get the complete URI 1187 * 1188 * @return string 1189 */ 1190 public function get_uri() 1191 { 1192 return $this->to_uri($this->get_iri()); 1193 } 1194 1195 /** 1196 * Get the complete iauthority 1197 * 1198 * @return string 1199 */ 1200 protected function get_iauthority() 1201 { 1202 if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null) 1203 { 1204 $iauthority = ''; 1205 if ($this->iuserinfo !== null) 1206 { 1207 $iauthority .= $this->iuserinfo . '@'; 1208 } 1209 if ($this->ihost !== null) 1210 { 1211 $iauthority .= $this->ihost; 1212 } 1213 if ($this->port !== null && $this->port !== 0) 1214 { 1215 $iauthority .= ':' . $this->port; 1216 } 1217 return $iauthority; 1218 } 1219 1220 return null; 1221 } 1222 1223 /** 1224 * Get the complete authority 1225 * 1226 * @return string 1227 */ 1228 protected function get_authority() 1229 { 1230 $iauthority = $this->get_iauthority(); 1231 if (is_string($iauthority)) 1232 return $this->to_uri($iauthority); 1233 1234 return $iauthority; 1235 } 60 /** 61 * Scheme 62 * 63 * @var string 64 */ 65 protected $scheme = null; 66 67 /** 68 * User Information 69 * 70 * @var string 71 */ 72 protected $iuserinfo = null; 73 74 /** 75 * ihost 76 * 77 * @var string 78 */ 79 protected $ihost = null; 80 81 /** 82 * Port 83 * 84 * @var string 85 */ 86 protected $port = null; 87 88 /** 89 * ipath 90 * 91 * @var string 92 */ 93 protected $ipath = ''; 94 95 /** 96 * iquery 97 * 98 * @var string 99 */ 100 protected $iquery = null; 101 102 /** 103 * ifragment 104 * 105 * @var string 106 */ 107 protected $ifragment = null; 108 109 /** 110 * Normalization database 111 * 112 * Each key is the scheme, each value is an array with each key as the IRI 113 * part and value as the default value for that part. 114 */ 115 protected $normalization = [ 116 'acap' => [ 117 'port' => 674 118 ], 119 'dict' => [ 120 'port' => 2628 121 ], 122 'file' => [ 123 'ihost' => 'localhost' 124 ], 125 'http' => [ 126 'port' => 80, 127 'ipath' => '/' 128 ], 129 'https' => [ 130 'port' => 443, 131 'ipath' => '/' 132 ], 133 ]; 134 135 /** 136 * Return the entire IRI when you try and read the object as a string 137 * 138 * @return string 139 */ 140 public function __toString() 141 { 142 return $this->get_iri(); 143 } 144 145 /** 146 * Overload __set() to provide access via properties 147 * 148 * @param string $name Property name 149 * @param mixed $value Property value 150 */ 151 public function __set($name, $value) 152 { 153 if (method_exists($this, 'set_' . $name)) { 154 call_user_func([$this, 'set_' . $name], $value); 155 } elseif ( 156 $name === 'iauthority' 157 || $name === 'iuserinfo' 158 || $name === 'ihost' 159 || $name === 'ipath' 160 || $name === 'iquery' 161 || $name === 'ifragment' 162 ) { 163 call_user_func([$this, 'set_' . substr($name, 1)], $value); 164 } 165 } 166 167 /** 168 * Overload __get() to provide access via properties 169 * 170 * @param string $name Property name 171 * @return mixed 172 */ 173 public function __get($name) 174 { 175 // isset() returns false for null, we don't want to do that 176 // Also why we use array_key_exists below instead of isset() 177 $props = get_object_vars($this); 178 179 if ( 180 $name === 'iri' || 181 $name === 'uri' || 182 $name === 'iauthority' || 183 $name === 'authority' 184 ) { 185 $return = $this->{"get_$name"}(); 186 } elseif (array_key_exists($name, $props)) { 187 $return = $this->$name; 188 } 189 // host -> ihost 190 elseif (($prop = 'i' . $name) && array_key_exists($prop, $props)) { 191 $name = $prop; 192 $return = $this->$prop; 193 } 194 // ischeme -> scheme 195 elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props)) { 196 $name = $prop; 197 $return = $this->$prop; 198 } else { 199 trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE); 200 $return = null; 201 } 202 203 if ($return === null && isset($this->normalization[$this->scheme][$name])) { 204 return $this->normalization[$this->scheme][$name]; 205 } 206 207 return $return; 208 } 209 210 /** 211 * Overload __isset() to provide access via properties 212 * 213 * @param string $name Property name 214 * @return bool 215 */ 216 public function __isset($name) 217 { 218 return method_exists($this, 'get_' . $name) || isset($this->$name); 219 } 220 221 /** 222 * Overload __unset() to provide access via properties 223 * 224 * @param string $name Property name 225 */ 226 public function __unset($name) 227 { 228 if (method_exists($this, 'set_' . $name)) { 229 call_user_func([$this, 'set_' . $name], ''); 230 } 231 } 232 233 /** 234 * Create a new IRI object, from a specified string 235 * 236 * @param string $iri 237 */ 238 public function __construct($iri = null) 239 { 240 $this->set_iri($iri); 241 } 242 243 /** 244 * Clean up 245 */ 246 public function __destruct() 247 { 248 $this->set_iri(null, true); 249 $this->set_path(null, true); 250 $this->set_authority(null, true); 251 } 252 253 /** 254 * Create a new IRI object by resolving a relative IRI 255 * 256 * Returns false if $base is not absolute, otherwise an IRI. 257 * 258 * @param IRI|string $base (Absolute) Base IRI 259 * @param IRI|string $relative Relative IRI 260 * @return IRI|false 261 */ 262 public static function absolutize($base, $relative) 263 { 264 if (!($relative instanceof IRI)) { 265 $relative = new IRI($relative); 266 } 267 if (!$relative->is_valid()) { 268 return false; 269 } elseif ($relative->scheme !== null) { 270 return clone $relative; 271 } else { 272 if (!($base instanceof IRI)) { 273 $base = new IRI($base); 274 } 275 if ($base->scheme !== null && $base->is_valid()) { 276 if ($relative->get_iri() !== '') { 277 if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null) { 278 $target = clone $relative; 279 $target->scheme = $base->scheme; 280 } else { 281 $target = new IRI(); 282 $target->scheme = $base->scheme; 283 $target->iuserinfo = $base->iuserinfo; 284 $target->ihost = $base->ihost; 285 $target->port = $base->port; 286 if ($relative->ipath !== '') { 287 if ($relative->ipath[0] === '/') { 288 $target->ipath = $relative->ipath; 289 } elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '') { 290 $target->ipath = '/' . $relative->ipath; 291 } elseif (($last_segment = strrpos($base->ipath, '/')) !== false) { 292 $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath; 293 } else { 294 $target->ipath = $relative->ipath; 295 } 296 $target->ipath = $target->remove_dot_segments($target->ipath); 297 $target->iquery = $relative->iquery; 298 } else { 299 $target->ipath = $base->ipath; 300 if ($relative->iquery !== null) { 301 $target->iquery = $relative->iquery; 302 } elseif ($base->iquery !== null) { 303 $target->iquery = $base->iquery; 304 } 305 } 306 $target->ifragment = $relative->ifragment; 307 } 308 } else { 309 $target = clone $base; 310 $target->ifragment = null; 311 } 312 $target->scheme_normalization(); 313 return $target; 314 } 315 316 return false; 317 } 318 } 319 320 /** 321 * Parse an IRI into scheme/authority/path/query/fragment segments 322 * 323 * @param string $iri 324 * @return array 325 */ 326 protected function parse_iri($iri) 327 { 328 $iri = trim($iri, "\x20\x09\x0A\x0C\x0D"); 329 if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match)) { 330 if ($match[1] === '') { 331 $match['scheme'] = null; 332 } 333 if (!isset($match[3]) || $match[3] === '') { 334 $match['authority'] = null; 335 } 336 if (!isset($match[5])) { 337 $match['path'] = ''; 338 } 339 if (!isset($match[6]) || $match[6] === '') { 340 $match['query'] = null; 341 } 342 if (!isset($match[8]) || $match[8] === '') { 343 $match['fragment'] = null; 344 } 345 return $match; 346 } 347 348 // This can occur when a paragraph is accidentally parsed as a URI 349 return false; 350 } 351 352 /** 353 * Remove dot segments from a path 354 * 355 * @param string $input 356 * @return string 357 */ 358 protected function remove_dot_segments($input) 359 { 360 $output = ''; 361 while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') { 362 // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise, 363 if (strpos($input, '../') === 0) { 364 $input = substr($input, 3); 365 } elseif (strpos($input, './') === 0) { 366 $input = substr($input, 2); 367 } 368 // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise, 369 elseif (strpos($input, '/./') === 0) { 370 $input = substr($input, 2); 371 } elseif ($input === '/.') { 372 $input = '/'; 373 } 374 // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise, 375 elseif (strpos($input, '/../') === 0) { 376 $input = substr($input, 3); 377 $output = substr_replace($output, '', intval(strrpos($output, '/'))); 378 } elseif ($input === '/..') { 379 $input = '/'; 380 $output = substr_replace($output, '', intval(strrpos($output, '/'))); 381 } 382 // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise, 383 elseif ($input === '.' || $input === '..') { 384 $input = ''; 385 } 386 // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer 387 elseif (($pos = strpos($input, '/', 1)) !== false) { 388 $output .= substr($input, 0, $pos); 389 $input = substr_replace($input, '', 0, $pos); 390 } else { 391 $output .= $input; 392 $input = ''; 393 } 394 } 395 return $output . $input; 396 } 397 398 /** 399 * Replace invalid character with percent encoding 400 * 401 * @param string $string Input string 402 * @param string $extra_chars Valid characters not in iunreserved or 403 * iprivate (this is ASCII-only) 404 * @param bool $iprivate Allow iprivate 405 * @return string 406 */ 407 protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false) 408 { 409 // Normalize as many pct-encoded sections as possible 410 $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', [$this, 'remove_iunreserved_percent_encoded'], $string); 411 412 // Replace invalid percent characters 413 $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string); 414 415 // Add unreserved and % to $extra_chars (the latter is safe because all 416 // pct-encoded sections are now valid). 417 $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%'; 418 419 // Now replace any bytes that aren't allowed with their pct-encoded versions 420 $position = 0; 421 $strlen = strlen($string); 422 while (($position += strspn($string, $extra_chars, $position)) < $strlen) { 423 $value = ord($string[$position]); 424 $character = 0; 425 426 // Start position 427 $start = $position; 428 429 // By default we are valid 430 $valid = true; 431 432 // No one byte sequences are valid due to the while. 433 // Two byte sequence: 434 if (($value & 0xE0) === 0xC0) { 435 $character = ($value & 0x1F) << 6; 436 $length = 2; 437 $remaining = 1; 438 } 439 // Three byte sequence: 440 elseif (($value & 0xF0) === 0xE0) { 441 $character = ($value & 0x0F) << 12; 442 $length = 3; 443 $remaining = 2; 444 } 445 // Four byte sequence: 446 elseif (($value & 0xF8) === 0xF0) { 447 $character = ($value & 0x07) << 18; 448 $length = 4; 449 $remaining = 3; 450 } 451 // Invalid byte: 452 else { 453 $valid = false; 454 $length = 1; 455 $remaining = 0; 456 } 457 458 if ($remaining) { 459 if ($position + $length <= $strlen) { 460 for ($position++; $remaining; $position++) { 461 $value = ord($string[$position]); 462 463 // Check that the byte is valid, then add it to the character: 464 if (($value & 0xC0) === 0x80) { 465 $character |= ($value & 0x3F) << (--$remaining * 6); 466 } 467 // If it is invalid, count the sequence as invalid and reprocess the current byte: 468 else { 469 $valid = false; 470 $position--; 471 break; 472 } 473 } 474 } else { 475 $position = $strlen - 1; 476 $valid = false; 477 } 478 } 479 480 // Percent encode anything invalid or not in ucschar 481 if ( 482 // Invalid sequences 483 !$valid 484 // Non-shortest form sequences are invalid 485 || $length > 1 && $character <= 0x7F 486 || $length > 2 && $character <= 0x7FF 487 || $length > 3 && $character <= 0xFFFF 488 // Outside of range of ucschar codepoints 489 // Noncharacters 490 || ($character & 0xFFFE) === 0xFFFE 491 || $character >= 0xFDD0 && $character <= 0xFDEF 492 || ( 493 // Everything else not in ucschar 494 $character > 0xD7FF && $character < 0xF900 495 || $character < 0xA0 496 || $character > 0xEFFFD 497 ) 498 && ( 499 // Everything not in iprivate, if it applies 500 !$iprivate 501 || $character < 0xE000 502 || $character > 0x10FFFD 503 ) 504 ) { 505 // If we were a character, pretend we weren't, but rather an error. 506 if ($valid) { 507 $position--; 508 } 509 510 for ($j = $start; $j <= $position; $j++) { 511 $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1); 512 $j += 2; 513 $position += 2; 514 $strlen += 2; 515 } 516 } 517 } 518 519 return $string; 520 } 521 522 /** 523 * Callback function for preg_replace_callback. 524 * 525 * Removes sequences of percent encoded bytes that represent UTF-8 526 * encoded characters in iunreserved 527 * 528 * @param array $match PCRE match 529 * @return string Replacement 530 */ 531 protected function remove_iunreserved_percent_encoded($match) 532 { 533 // As we just have valid percent encoded sequences we can just explode 534 // and ignore the first member of the returned array (an empty string). 535 $bytes = explode('%', $match[0]); 536 537 // Initialize the new string (this is what will be returned) and that 538 // there are no bytes remaining in the current sequence (unsurprising 539 // at the first byte!). 540 $string = ''; 541 $remaining = 0; 542 543 // these variables will be initialized in the loop but PHPStan is not able to detect it currently 544 $start = 0; 545 $character = 0; 546 $length = 0; 547 $valid = true; 548 549 // Loop over each and every byte, and set $value to its value 550 for ($i = 1, $len = count($bytes); $i < $len; $i++) { 551 $value = hexdec($bytes[$i]); 552 553 // If we're the first byte of sequence: 554 if (!$remaining) { 555 // Start position 556 $start = $i; 557 558 // By default we are valid 559 $valid = true; 560 561 // One byte sequence: 562 if ($value <= 0x7F) { 563 $character = $value; 564 $length = 1; 565 } 566 // Two byte sequence: 567 elseif (($value & 0xE0) === 0xC0) { 568 $character = ($value & 0x1F) << 6; 569 $length = 2; 570 $remaining = 1; 571 } 572 // Three byte sequence: 573 elseif (($value & 0xF0) === 0xE0) { 574 $character = ($value & 0x0F) << 12; 575 $length = 3; 576 $remaining = 2; 577 } 578 // Four byte sequence: 579 elseif (($value & 0xF8) === 0xF0) { 580 $character = ($value & 0x07) << 18; 581 $length = 4; 582 $remaining = 3; 583 } 584 // Invalid byte: 585 else { 586 $valid = false; 587 $remaining = 0; 588 } 589 } 590 // Continuation byte: 591 else { 592 // Check that the byte is valid, then add it to the character: 593 if (($value & 0xC0) === 0x80) { 594 $remaining--; 595 $character |= ($value & 0x3F) << ($remaining * 6); 596 } 597 // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence: 598 else { 599 $valid = false; 600 $remaining = 0; 601 $i--; 602 } 603 } 604 605 // If we've reached the end of the current byte sequence, append it to Unicode::$data 606 if (!$remaining) { 607 // Percent encode anything invalid or not in iunreserved 608 if ( 609 // Invalid sequences 610 !$valid 611 // Non-shortest form sequences are invalid 612 || $length > 1 && $character <= 0x7F 613 || $length > 2 && $character <= 0x7FF 614 || $length > 3 && $character <= 0xFFFF 615 // Outside of range of iunreserved codepoints 616 || $character < 0x2D 617 || $character > 0xEFFFD 618 // Noncharacters 619 || ($character & 0xFFFE) === 0xFFFE 620 || $character >= 0xFDD0 && $character <= 0xFDEF 621 // Everything else not in iunreserved (this is all BMP) 622 || $character === 0x2F 623 || $character > 0x39 && $character < 0x41 624 || $character > 0x5A && $character < 0x61 625 || $character > 0x7A && $character < 0x7E 626 || $character > 0x7E && $character < 0xA0 627 || $character > 0xD7FF && $character < 0xF900 628 ) { 629 for ($j = $start; $j <= $i; $j++) { 630 $string .= '%' . strtoupper($bytes[$j]); 631 } 632 } else { 633 for ($j = $start; $j <= $i; $j++) { 634 $string .= chr(hexdec($bytes[$j])); 635 } 636 } 637 } 638 } 639 640 // If we have any bytes left over they are invalid (i.e., we are 641 // mid-way through a multi-byte sequence) 642 if ($remaining) { 643 for ($j = $start; $j < $len; $j++) { 644 $string .= '%' . strtoupper($bytes[$j]); 645 } 646 } 647 648 return $string; 649 } 650 651 protected function scheme_normalization() 652 { 653 if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo']) { 654 $this->iuserinfo = null; 655 } 656 if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost']) { 657 $this->ihost = null; 658 } 659 if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port']) { 660 $this->port = null; 661 } 662 if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath']) { 663 $this->ipath = ''; 664 } 665 if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery']) { 666 $this->iquery = null; 667 } 668 if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment']) { 669 $this->ifragment = null; 670 } 671 } 672 673 /** 674 * Check if the object represents a valid IRI. This needs to be done on each 675 * call as some things change depending on another part of the IRI. 676 * 677 * @return bool 678 */ 679 public function is_valid() 680 { 681 if ($this->ipath === '') { 682 return true; 683 } 684 685 $isauthority = $this->iuserinfo !== null || $this->ihost !== null || 686 $this->port !== null; 687 if ($isauthority && $this->ipath[0] === '/') { 688 return true; 689 } 690 691 if (!$isauthority && (substr($this->ipath, 0, 2) === '//')) { 692 return false; 693 } 694 695 // Relative urls cannot have a colon in the first path segment (and the 696 // slashes themselves are not included so skip the first character). 697 if (!$this->scheme && !$isauthority && 698 strpos($this->ipath, ':') !== false && 699 strpos($this->ipath, '/', 1) !== false && 700 strpos($this->ipath, ':') < strpos($this->ipath, '/', 1)) { 701 return false; 702 } 703 704 return true; 705 } 706 707 /** 708 * Set the entire IRI. Returns true on success, false on failure (if there 709 * are any invalid characters). 710 * 711 * @param string $iri 712 * @return bool 713 */ 714 public function set_iri($iri, $clear_cache = false) 715 { 716 static $cache; 717 if ($clear_cache) { 718 $cache = null; 719 return; 720 } 721 if (!$cache) { 722 $cache = []; 723 } 724 725 if ($iri === null) { 726 return true; 727 } elseif (isset($cache[$iri])) { 728 [ 729 $this->scheme, 730 $this->iuserinfo, 731 $this->ihost, 732 $this->port, 733 $this->ipath, 734 $this->iquery, 735 $this->ifragment, 736 $return 737 ] = $cache[$iri]; 738 739 return $return; 740 } 741 742 $parsed = $this->parse_iri((string) $iri); 743 if (!$parsed) { 744 return false; 745 } 746 747 $return = $this->set_scheme($parsed['scheme']) 748 && $this->set_authority($parsed['authority']) 749 && $this->set_path($parsed['path']) 750 && $this->set_query($parsed['query']) 751 && $this->set_fragment($parsed['fragment']); 752 753 $cache[$iri] = [ 754 $this->scheme, 755 $this->iuserinfo, 756 $this->ihost, 757 $this->port, 758 $this->ipath, 759 $this->iquery, 760 $this->ifragment, 761 $return 762 ]; 763 764 return $return; 765 } 766 767 /** 768 * Set the scheme. Returns true on success, false on failure (if there are 769 * any invalid characters). 770 * 771 * @param string $scheme 772 * @return bool 773 */ 774 public function set_scheme($scheme) 775 { 776 if ($scheme === null) { 777 $this->scheme = null; 778 } elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme)) { 779 $this->scheme = null; 780 return false; 781 } else { 782 $this->scheme = strtolower($scheme); 783 } 784 return true; 785 } 786 787 /** 788 * Set the authority. Returns true on success, false on failure (if there are 789 * any invalid characters). 790 * 791 * @param string $authority 792 * @return bool 793 */ 794 public function set_authority($authority, $clear_cache = false) 795 { 796 static $cache; 797 if ($clear_cache) { 798 $cache = null; 799 return; 800 } 801 if (!$cache) { 802 $cache = []; 803 } 804 805 if ($authority === null) { 806 $this->iuserinfo = null; 807 $this->ihost = null; 808 $this->port = null; 809 return true; 810 } elseif (isset($cache[$authority])) { 811 [ 812 $this->iuserinfo, 813 $this->ihost, 814 $this->port, 815 $return 816 ] = $cache[$authority]; 817 818 return $return; 819 } 820 821 $remaining = $authority; 822 if (($iuserinfo_end = strrpos($remaining, '@')) !== false) { 823 $iuserinfo = substr($remaining, 0, $iuserinfo_end); 824 $remaining = substr($remaining, $iuserinfo_end + 1); 825 } else { 826 $iuserinfo = null; 827 } 828 if (($port_start = strpos($remaining, ':', intval(strpos($remaining, ']')))) !== false) { 829 if (($port = substr($remaining, $port_start + 1)) === false) { 830 $port = null; 831 } 832 $remaining = substr($remaining, 0, $port_start); 833 } else { 834 $port = null; 835 } 836 837 $return = $this->set_userinfo($iuserinfo) && 838 $this->set_host($remaining) && 839 $this->set_port($port); 840 841 $cache[$authority] = [ 842 $this->iuserinfo, 843 $this->ihost, 844 $this->port, 845 $return 846 ]; 847 848 return $return; 849 } 850 851 /** 852 * Set the iuserinfo. 853 * 854 * @param string $iuserinfo 855 * @return bool 856 */ 857 public function set_userinfo($iuserinfo) 858 { 859 if ($iuserinfo === null) { 860 $this->iuserinfo = null; 861 } else { 862 $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:'); 863 $this->scheme_normalization(); 864 } 865 866 return true; 867 } 868 869 /** 870 * Set the ihost. Returns true on success, false on failure (if there are 871 * any invalid characters). 872 * 873 * @param string $ihost 874 * @return bool 875 */ 876 public function set_host($ihost) 877 { 878 if ($ihost === null) { 879 $this->ihost = null; 880 return true; 881 } elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']') { 882 if (\SimplePie\Net\IPv6::check_ipv6(substr($ihost, 1, -1))) { 883 $this->ihost = '[' . \SimplePie\Net\IPv6::compress(substr($ihost, 1, -1)) . ']'; 884 } else { 885 $this->ihost = null; 886 return false; 887 } 888 } else { 889 $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;='); 890 891 // Lowercase, but ignore pct-encoded sections (as they should 892 // remain uppercase). This must be done after the previous step 893 // as that can add unescaped characters. 894 $position = 0; 895 $strlen = strlen($ihost); 896 while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen) { 897 if ($ihost[$position] === '%') { 898 $position += 3; 899 } else { 900 $ihost[$position] = strtolower($ihost[$position]); 901 $position++; 902 } 903 } 904 905 $this->ihost = $ihost; 906 } 907 908 $this->scheme_normalization(); 909 910 return true; 911 } 912 913 /** 914 * Set the port. Returns true on success, false on failure (if there are 915 * any invalid characters). 916 * 917 * @param string $port 918 * @return bool 919 */ 920 public function set_port($port) 921 { 922 if ($port === null) { 923 $this->port = null; 924 return true; 925 } elseif (strspn($port, '0123456789') === strlen($port)) { 926 $this->port = (int) $port; 927 $this->scheme_normalization(); 928 return true; 929 } 930 931 $this->port = null; 932 return false; 933 } 934 935 /** 936 * Set the ipath. 937 * 938 * @param string $ipath 939 * @return bool 940 */ 941 public function set_path($ipath, $clear_cache = false) 942 { 943 static $cache; 944 if ($clear_cache) { 945 $cache = null; 946 return; 947 } 948 if (!$cache) { 949 $cache = []; 950 } 951 952 $ipath = (string) $ipath; 953 954 if (isset($cache[$ipath])) { 955 $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)]; 956 } else { 957 $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/'); 958 $removed = $this->remove_dot_segments($valid); 959 960 $cache[$ipath] = [$valid, $removed]; 961 $this->ipath = ($this->scheme !== null) ? $removed : $valid; 962 } 963 964 $this->scheme_normalization(); 965 return true; 966 } 967 968 /** 969 * Set the iquery. 970 * 971 * @param string $iquery 972 * @return bool 973 */ 974 public function set_query($iquery) 975 { 976 if ($iquery === null) { 977 $this->iquery = null; 978 } else { 979 $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true); 980 $this->scheme_normalization(); 981 } 982 return true; 983 } 984 985 /** 986 * Set the ifragment. 987 * 988 * @param string $ifragment 989 * @return bool 990 */ 991 public function set_fragment($ifragment) 992 { 993 if ($ifragment === null) { 994 $this->ifragment = null; 995 } else { 996 $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?'); 997 $this->scheme_normalization(); 998 } 999 return true; 1000 } 1001 1002 /** 1003 * Convert an IRI to a URI (or parts thereof) 1004 * 1005 * @return string 1006 */ 1007 public function to_uri($string) 1008 { 1009 static $non_ascii; 1010 if (!$non_ascii) { 1011 $non_ascii = implode('', range("\x80", "\xFF")); 1012 } 1013 1014 $position = 0; 1015 $strlen = strlen($string); 1016 while (($position += strcspn($string, $non_ascii, $position)) < $strlen) { 1017 $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1); 1018 $position += 3; 1019 $strlen += 2; 1020 } 1021 1022 return $string; 1023 } 1024 1025 /** 1026 * Get the complete IRI 1027 * 1028 * @return string 1029 */ 1030 public function get_iri() 1031 { 1032 if (!$this->is_valid()) { 1033 return false; 1034 } 1035 1036 $iri = ''; 1037 if ($this->scheme !== null) { 1038 $iri .= $this->scheme . ':'; 1039 } 1040 if (($iauthority = $this->get_iauthority()) !== null) { 1041 $iri .= '//' . $iauthority; 1042 } 1043 if ($this->ipath !== '') { 1044 $iri .= $this->ipath; 1045 } elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '') { 1046 $iri .= $this->normalization[$this->scheme]['ipath']; 1047 } 1048 if ($this->iquery !== null) { 1049 $iri .= '?' . $this->iquery; 1050 } 1051 if ($this->ifragment !== null) { 1052 $iri .= '#' . $this->ifragment; 1053 } 1054 1055 return $iri; 1056 } 1057 1058 /** 1059 * Get the complete URI 1060 * 1061 * @return string 1062 */ 1063 public function get_uri() 1064 { 1065 return $this->to_uri($this->get_iri()); 1066 } 1067 1068 /** 1069 * Get the complete iauthority 1070 * 1071 * @return string 1072 */ 1073 protected function get_iauthority() 1074 { 1075 if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null) { 1076 $iauthority = ''; 1077 if ($this->iuserinfo !== null) { 1078 $iauthority .= $this->iuserinfo . '@'; 1079 } 1080 if ($this->ihost !== null) { 1081 $iauthority .= $this->ihost; 1082 } 1083 if ($this->port !== null && $this->port !== 0) { 1084 $iauthority .= ':' . $this->port; 1085 } 1086 return $iauthority; 1087 } 1088 1089 return null; 1090 } 1091 1092 /** 1093 * Get the complete authority 1094 * 1095 * @return string 1096 */ 1097 protected function get_authority() 1098 { 1099 $iauthority = $this->get_iauthority(); 1100 if (is_string($iauthority)) { 1101 return $this->to_uri($iauthority); 1102 } 1103 1104 return $iauthority; 1105 } 1236 1106 } 1107 1108 class_alias('SimplePie\IRI', 'SimplePie_IRI');
Note: See TracChangeset
for help on using the changeset viewer.