Changeset 59141 for trunk/src/wp-includes/SimplePie/src/Sanitize.php
- Timestamp:
- 09/30/2024 10:48:16 PM (8 months ago)
- Location:
- trunk/src/wp-includes/SimplePie/src
- Files:
-
- 1 added
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/SimplePie/src/Sanitize.php
r59140 r59141 1 1 <?php 2 2 3 /** 3 4 * SimplePie … … 6 7 * Takes the hard work out of managing a complete RSS/Atom solution. 7 8 * 8 * Copyright (c) 2004-20 16, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors9 * Copyright (c) 2004-2022, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors 9 10 * All rights reserved. 10 11 * … … 42 43 */ 43 44 45 namespace SimplePie; 46 47 use InvalidArgumentException; 48 use SimplePie\Cache\Base; 49 use SimplePie\Cache\BaseDataCache; 50 use SimplePie\Cache\CallableNameFilter; 51 use SimplePie\Cache\DataCache; 52 use SimplePie\Cache\NameFilter; 53 44 54 /** 45 55 * Used for data cleanup and post-processing 46 56 * 47 57 * 48 * This class can be overloaded with {@see SimplePie::set_sanitize_class()}58 * This class can be overloaded with {@see \SimplePie\SimplePie::set_sanitize_class()} 49 59 * 50 60 * @package SimplePie 51 61 * @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags 52 62 */ 53 class S implePie_Sanitize63 class Sanitize implements RegistryAware 54 64 { 55 // Private vars 56 var $base; 57 58 // Options 59 var $remove_div = true; 60 var $image_handler = ''; 61 var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'); 62 var $encode_instead_of_strip = false; 63 var $strip_attributes = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'); 64 var $add_attributes = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none')); 65 var $strip_comments = false; 66 var $output_encoding = 'UTF-8'; 67 var $enable_cache = true; 68 var $cache_location = './cache'; 69 var $cache_name_function = 'md5'; 70 var $timeout = 10; 71 var $useragent = ''; 72 var $force_fsockopen = false; 73 var $replace_url_attributes = null; 74 var $registry; 75 76 /** 77 * List of domains for which to force HTTPS. 78 * @see SimplePie_Sanitize::set_https_domains() 79 * Array is a tree split at DNS levels. Example: 80 * array('biz' => true, 'com' => array('example' => true), 'net' => array('example' => array('www' => true))) 81 */ 82 var $https_domains = array(); 83 84 public function __construct() 85 { 86 // Set defaults 87 $this->set_url_replacements(null); 88 } 89 90 public function remove_div($enable = true) 91 { 92 $this->remove_div = (bool) $enable; 93 } 94 95 public function set_image_handler($page = false) 96 { 97 if ($page) 98 { 99 $this->image_handler = (string) $page; 100 } 101 else 102 { 103 $this->image_handler = false; 104 } 105 } 106 107 public function set_registry(SimplePie_Registry $registry) 108 { 109 $this->registry = $registry; 110 } 111 112 public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache') 113 { 114 if (isset($enable_cache)) 115 { 116 $this->enable_cache = (bool) $enable_cache; 117 } 118 119 if ($cache_location) 120 { 121 $this->cache_location = (string) $cache_location; 122 } 123 124 if ($cache_name_function) 125 { 126 $this->cache_name_function = (string) $cache_name_function; 127 } 128 } 129 130 public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false) 131 { 132 if ($timeout) 133 { 134 $this->timeout = (string) $timeout; 135 } 136 137 if ($useragent) 138 { 139 $this->useragent = (string) $useragent; 140 } 141 142 if ($force_fsockopen) 143 { 144 $this->force_fsockopen = (string) $force_fsockopen; 145 } 146 } 147 148 public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style')) 149 { 150 if ($tags) 151 { 152 if (is_array($tags)) 153 { 154 $this->strip_htmltags = $tags; 155 } 156 else 157 { 158 $this->strip_htmltags = explode(',', $tags); 159 } 160 } 161 else 162 { 163 $this->strip_htmltags = false; 164 } 165 } 166 167 public function encode_instead_of_strip($encode = false) 168 { 169 $this->encode_instead_of_strip = (bool) $encode; 170 } 171 172 public function strip_attributes($attribs = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc')) 173 { 174 if ($attribs) 175 { 176 if (is_array($attribs)) 177 { 178 $this->strip_attributes = $attribs; 179 } 180 else 181 { 182 $this->strip_attributes = explode(',', $attribs); 183 } 184 } 185 else 186 { 187 $this->strip_attributes = false; 188 } 189 } 190 191 public function add_attributes($attribs = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none'))) 192 { 193 if ($attribs) 194 { 195 if (is_array($attribs)) 196 { 197 $this->add_attributes = $attribs; 198 } 199 else 200 { 201 $this->add_attributes = explode(',', $attribs); 202 } 203 } 204 else 205 { 206 $this->add_attributes = false; 207 } 208 } 209 210 public function strip_comments($strip = false) 211 { 212 $this->strip_comments = (bool) $strip; 213 } 214 215 public function set_output_encoding($encoding = 'UTF-8') 216 { 217 $this->output_encoding = (string) $encoding; 218 } 219 220 /** 221 * Set element/attribute key/value pairs of HTML attributes 222 * containing URLs that need to be resolved relative to the feed 223 * 224 * Defaults to |a|@href, |area|@href, |blockquote|@cite, |del|@cite, 225 * |form|@action, |img|@longdesc, |img|@src, |input|@src, |ins|@cite, 226 * |q|@cite 227 * 228 * @since 1.0 229 * @param array|null $element_attribute Element/attribute key/value pairs, null for default 230 */ 231 public function set_url_replacements($element_attribute = null) 232 { 233 if ($element_attribute === null) 234 { 235 $element_attribute = array( 236 'a' => 'href', 237 'area' => 'href', 238 'blockquote' => 'cite', 239 'del' => 'cite', 240 'form' => 'action', 241 'img' => array( 242 'longdesc', 243 'src' 244 ), 245 'input' => 'src', 246 'ins' => 'cite', 247 'q' => 'cite' 248 ); 249 } 250 $this->replace_url_attributes = (array) $element_attribute; 251 } 252 253 /** 254 * Set the list of domains for which to force HTTPS. 255 * @see SimplePie_Misc::https_url() 256 * Example array('biz', 'example.com', 'example.org', 'www.example.net'); 257 */ 258 public function set_https_domains($domains) 259 { 260 $this->https_domains = array(); 261 foreach ($domains as $domain) 262 { 263 $domain = trim($domain, ". \t\n\r\0\x0B"); 264 $segments = array_reverse(explode('.', $domain)); 265 $node =& $this->https_domains; 266 foreach ($segments as $segment) 267 {//Build a tree 268 if ($node === true) 269 { 270 break; 271 } 272 if (!isset($node[$segment])) 273 { 274 $node[$segment] = array(); 275 } 276 $node =& $node[$segment]; 277 } 278 $node = true; 279 } 280 } 281 282 /** 283 * Check if the domain is in the list of forced HTTPS. 284 */ 285 protected function is_https_domain($domain) 286 { 287 $domain = trim($domain, '. '); 288 $segments = array_reverse(explode('.', $domain)); 289 $node =& $this->https_domains; 290 foreach ($segments as $segment) 291 {//Explore the tree 292 if (isset($node[$segment])) 293 { 294 $node =& $node[$segment]; 295 } 296 else 297 { 298 break; 299 } 300 } 301 return $node === true; 302 } 303 304 /** 305 * Force HTTPS for selected Web sites. 306 */ 307 public function https_url($url) 308 { 309 return (strtolower(substr($url, 0, 7)) === 'http://') && 310 $this->is_https_domain(parse_url($url, PHP_URL_HOST)) ? 311 substr_replace($url, 's', 4, 0) : //Add the 's' to HTTPS 312 $url; 313 } 314 315 public function sanitize($data, $type, $base = '') 316 { 317 $data = trim($data); 318 if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI) 319 { 320 if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML) 321 { 322 if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data)) 323 { 324 $type |= SIMPLEPIE_CONSTRUCT_HTML; 325 } 326 else 327 { 328 $type |= SIMPLEPIE_CONSTRUCT_TEXT; 329 } 330 } 331 332 if ($type & SIMPLEPIE_CONSTRUCT_BASE64) 333 { 334 $data = base64_decode($data); 335 } 336 337 if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) 338 { 339 340 if (!class_exists('DOMDocument')) 341 { 342 throw new SimplePie_Exception('DOMDocument not found, unable to use sanitizer'); 343 } 344 $document = new DOMDocument(); 345 $document->encoding = 'UTF-8'; 346 347 $data = $this->preprocess($data, $type); 348 349 set_error_handler(array('SimplePie_Misc', 'silence_errors')); 350 $document->loadHTML($data); 351 restore_error_handler(); 352 353 $xpath = new DOMXPath($document); 354 355 // Strip comments 356 if ($this->strip_comments) 357 { 358 $comments = $xpath->query('//comment()'); 359 360 foreach ($comments as $comment) 361 { 362 $comment->parentNode->removeChild($comment); 363 } 364 } 365 366 // Strip out HTML tags and attributes that might cause various security problems. 367 // Based on recommendations by Mark Pilgrim at: 368 // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely 369 if ($this->strip_htmltags) 370 { 371 foreach ($this->strip_htmltags as $tag) 372 { 373 $this->strip_tag($tag, $document, $xpath, $type); 374 } 375 } 376 377 if ($this->strip_attributes) 378 { 379 foreach ($this->strip_attributes as $attrib) 380 { 381 $this->strip_attr($attrib, $xpath); 382 } 383 } 384 385 if ($this->add_attributes) 386 { 387 foreach ($this->add_attributes as $tag => $valuePairs) 388 { 389 $this->add_attr($tag, $valuePairs, $document); 390 } 391 } 392 393 // Replace relative URLs 394 $this->base = $base; 395 foreach ($this->replace_url_attributes as $element => $attributes) 396 { 397 $this->replace_urls($document, $element, $attributes); 398 } 399 400 // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags. 401 if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache) 402 { 403 $images = $document->getElementsByTagName('img'); 404 foreach ($images as $img) 405 { 406 if ($img->hasAttribute('src')) 407 { 408 $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src')); 409 $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi')); 410 411 if ($cache->load()) 412 { 413 $img->setAttribute('src', $this->image_handler . $image_url); 414 } 415 else 416 { 417 $file = $this->registry->create('File', array($img->getAttribute('src'), $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen)); 418 $headers = $file->headers; 419 420 if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) 421 { 422 if ($cache->save(array('headers' => $file->headers, 'body' => $file->body))) 423 { 424 $img->setAttribute('src', $this->image_handler . $image_url); 425 } 426 else 427 { 428 trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); 429 } 430 } 431 } 432 } 433 } 434 } 435 436 // Get content node 437 $div = $document->getElementsByTagName('body')->item(0)->firstChild; 438 // Finally, convert to a HTML string 439 $data = trim($document->saveHTML($div)); 440 441 if ($this->remove_div) 442 { 443 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data); 444 $data = preg_replace('/<\/div>$/', '', $data); 445 } 446 else 447 { 448 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data); 449 } 450 } 451 452 if ($type & SIMPLEPIE_CONSTRUCT_IRI) 453 { 454 $absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base)); 455 if ($absolute !== false) 456 { 457 $data = $absolute; 458 } 459 } 460 461 if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI)) 462 { 463 $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8'); 464 } 465 466 if ($this->output_encoding !== 'UTF-8') 467 { 468 $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding)); 469 } 470 } 471 return $data; 472 } 473 474 protected function preprocess($html, $type) 475 { 476 $ret = ''; 477 $html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html); 478 if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML) 479 { 480 // Atom XHTML constructs are wrapped with a div by default 481 // Note: No protection if $html contains a stray </div>! 482 $html = '<div>' . $html . '</div>'; 483 $ret .= '<!DOCTYPE html>'; 484 $content_type = 'text/html'; 485 } 486 else 487 { 488 $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'; 489 $content_type = 'application/xhtml+xml'; 490 } 491 492 $ret .= '<html><head>'; 493 $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />'; 494 $ret .= '</head><body>' . $html . '</body></html>'; 495 return $ret; 496 } 497 498 public function replace_urls($document, $tag, $attributes) 499 { 500 if (!is_array($attributes)) 501 { 502 $attributes = array($attributes); 503 } 504 505 if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) 506 { 507 $elements = $document->getElementsByTagName($tag); 508 foreach ($elements as $element) 509 { 510 foreach ($attributes as $attribute) 511 { 512 if ($element->hasAttribute($attribute)) 513 { 514 $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base)); 515 if ($value !== false) 516 { 517 $value = $this->https_url($value); 518 $element->setAttribute($attribute, $value); 519 } 520 } 521 } 522 } 523 } 524 } 525 526 public function do_strip_htmltags($match) 527 { 528 if ($this->encode_instead_of_strip) 529 { 530 if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style'))) 531 { 532 $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8'); 533 $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8'); 534 return "<$match[1]$match[2]>$match[3]</$match[1]>"; 535 } 536 else 537 { 538 return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8'); 539 } 540 } 541 elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style'))) 542 { 543 return $match[4]; 544 } 545 else 546 { 547 return ''; 548 } 549 } 550 551 protected function strip_tag($tag, $document, $xpath, $type) 552 { 553 $elements = $xpath->query('body//' . $tag); 554 if ($this->encode_instead_of_strip) 555 { 556 foreach ($elements as $element) 557 { 558 $fragment = $document->createDocumentFragment(); 559 560 // For elements which aren't script or style, include the tag itself 561 if (!in_array($tag, array('script', 'style'))) 562 { 563 $text = '<' . $tag; 564 if ($element->hasAttributes()) 565 { 566 $attrs = array(); 567 foreach ($element->attributes as $name => $attr) 568 { 569 $value = $attr->value; 570 571 // In XHTML, empty values should never exist, so we repeat the value 572 if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML)) 573 { 574 $value = $name; 575 } 576 // For HTML, empty is fine 577 elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML)) 578 { 579 $attrs[] = $name; 580 continue; 581 } 582 583 // Standard attribute text 584 $attrs[] = $name . '="' . $attr->value . '"'; 585 } 586 $text .= ' ' . implode(' ', $attrs); 587 } 588 $text .= '>'; 589 $fragment->appendChild(new DOMText($text)); 590 } 591 592 $number = $element->childNodes->length; 593 for ($i = $number; $i > 0; $i--) 594 { 595 $child = $element->childNodes->item(0); 596 $fragment->appendChild($child); 597 } 598 599 if (!in_array($tag, array('script', 'style'))) 600 { 601 $fragment->appendChild(new DOMText('</' . $tag . '>')); 602 } 603 604 $element->parentNode->replaceChild($fragment, $element); 605 } 606 607 return; 608 } 609 elseif (in_array($tag, array('script', 'style'))) 610 { 611 foreach ($elements as $element) 612 { 613 $element->parentNode->removeChild($element); 614 } 615 616 return; 617 } 618 else 619 { 620 foreach ($elements as $element) 621 { 622 $fragment = $document->createDocumentFragment(); 623 $number = $element->childNodes->length; 624 for ($i = $number; $i > 0; $i--) 625 { 626 $child = $element->childNodes->item(0); 627 $fragment->appendChild($child); 628 } 629 630 $element->parentNode->replaceChild($fragment, $element); 631 } 632 } 633 } 634 635 protected function strip_attr($attrib, $xpath) 636 { 637 $elements = $xpath->query('//*[@' . $attrib . ']'); 638 639 foreach ($elements as $element) 640 { 641 $element->removeAttribute($attrib); 642 } 643 } 644 645 protected function add_attr($tag, $valuePairs, $document) 646 { 647 $elements = $document->getElementsByTagName($tag); 648 foreach ($elements as $element) 649 { 650 foreach ($valuePairs as $attrib => $value) 651 { 652 $element->setAttribute($attrib, $value); 653 } 654 } 655 } 65 // Private vars 66 public $base; 67 68 // Options 69 public $remove_div = true; 70 public $image_handler = ''; 71 public $strip_htmltags = ['base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style']; 72 public $encode_instead_of_strip = false; 73 public $strip_attributes = ['bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc']; 74 public $rename_attributes = []; 75 public $add_attributes = ['audio' => ['preload' => 'none'], 'iframe' => ['sandbox' => 'allow-scripts allow-same-origin'], 'video' => ['preload' => 'none']]; 76 public $strip_comments = false; 77 public $output_encoding = 'UTF-8'; 78 public $enable_cache = true; 79 public $cache_location = './cache'; 80 public $cache_name_function = 'md5'; 81 82 /** 83 * @var NameFilter 84 */ 85 private $cache_namefilter; 86 public $timeout = 10; 87 public $useragent = ''; 88 public $force_fsockopen = false; 89 public $replace_url_attributes = null; 90 public $registry; 91 92 /** 93 * @var DataCache|null 94 */ 95 private $cache = null; 96 97 /** 98 * @var int Cache duration (in seconds) 99 */ 100 private $cache_duration = 3600; 101 102 /** 103 * List of domains for which to force HTTPS. 104 * @see \SimplePie\Sanitize::set_https_domains() 105 * Array is a tree split at DNS levels. Example: 106 * array('biz' => true, 'com' => array('example' => true), 'net' => array('example' => array('www' => true))) 107 */ 108 public $https_domains = []; 109 110 public function __construct() 111 { 112 // Set defaults 113 $this->set_url_replacements(null); 114 } 115 116 public function remove_div($enable = true) 117 { 118 $this->remove_div = (bool) $enable; 119 } 120 121 public function set_image_handler($page = false) 122 { 123 if ($page) { 124 $this->image_handler = (string) $page; 125 } else { 126 $this->image_handler = false; 127 } 128 } 129 130 public function set_registry(\SimplePie\Registry $registry)/* : void */ 131 { 132 $this->registry = $registry; 133 } 134 135 public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie\Cache', ?DataCache $cache = null) 136 { 137 if (isset($enable_cache)) { 138 $this->enable_cache = (bool) $enable_cache; 139 } 140 141 if ($cache_location) { 142 $this->cache_location = (string) $cache_location; 143 } 144 145 if (!is_string($cache_name_function) && !is_object($cache_name_function) && !$cache_name_function instanceof NameFilter) { 146 throw new InvalidArgumentException(sprintf( 147 '%s(): Argument #3 ($cache_name_function) must be of type %s', 148 __METHOD__, 149 NameFilter::class 150 ), 1); 151 } 152 153 // BC: $cache_name_function could be a callable as string 154 if (is_string($cache_name_function)) { 155 // trigger_error(sprintf('Providing $cache_name_function as string in "%s()" is deprecated since SimplePie 1.8.0, provide as "%s" instead.', __METHOD__, NameFilter::class), \E_USER_DEPRECATED); 156 $this->cache_name_function = (string) $cache_name_function; 157 158 $cache_name_function = new CallableNameFilter($cache_name_function); 159 } 160 161 $this->cache_namefilter = $cache_name_function; 162 163 if ($cache !== null) { 164 $this->cache = $cache; 165 } 166 } 167 168 public function pass_file_data($file_class = 'SimplePie\File', $timeout = 10, $useragent = '', $force_fsockopen = false) 169 { 170 if ($timeout) { 171 $this->timeout = (string) $timeout; 172 } 173 174 if ($useragent) { 175 $this->useragent = (string) $useragent; 176 } 177 178 if ($force_fsockopen) { 179 $this->force_fsockopen = (string) $force_fsockopen; 180 } 181 } 182 183 public function strip_htmltags($tags = ['base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style']) 184 { 185 if ($tags) { 186 if (is_array($tags)) { 187 $this->strip_htmltags = $tags; 188 } else { 189 $this->strip_htmltags = explode(',', $tags); 190 } 191 } else { 192 $this->strip_htmltags = false; 193 } 194 } 195 196 public function encode_instead_of_strip($encode = false) 197 { 198 $this->encode_instead_of_strip = (bool) $encode; 199 } 200 201 public function rename_attributes($attribs = []) 202 { 203 if ($attribs) { 204 if (is_array($attribs)) { 205 $this->rename_attributes = $attribs; 206 } else { 207 $this->rename_attributes = explode(',', $attribs); 208 } 209 } else { 210 $this->rename_attributes = false; 211 } 212 } 213 214 public function strip_attributes($attribs = ['bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc']) 215 { 216 if ($attribs) { 217 if (is_array($attribs)) { 218 $this->strip_attributes = $attribs; 219 } else { 220 $this->strip_attributes = explode(',', $attribs); 221 } 222 } else { 223 $this->strip_attributes = false; 224 } 225 } 226 227 public function add_attributes($attribs = ['audio' => ['preload' => 'none'], 'iframe' => ['sandbox' => 'allow-scripts allow-same-origin'], 'video' => ['preload' => 'none']]) 228 { 229 if ($attribs) { 230 if (is_array($attribs)) { 231 $this->add_attributes = $attribs; 232 } else { 233 $this->add_attributes = explode(',', $attribs); 234 } 235 } else { 236 $this->add_attributes = false; 237 } 238 } 239 240 public function strip_comments($strip = false) 241 { 242 $this->strip_comments = (bool) $strip; 243 } 244 245 public function set_output_encoding($encoding = 'UTF-8') 246 { 247 $this->output_encoding = (string) $encoding; 248 } 249 250 /** 251 * Set element/attribute key/value pairs of HTML attributes 252 * containing URLs that need to be resolved relative to the feed 253 * 254 * Defaults to |a|@href, |area|@href, |audio|@src, |blockquote|@cite, 255 * |del|@cite, |form|@action, |img|@longdesc, |img|@src, |input|@src, 256 * |ins|@cite, |q|@cite, |source|@src, |video|@src 257 * 258 * @since 1.0 259 * @param array|null $element_attribute Element/attribute key/value pairs, null for default 260 */ 261 public function set_url_replacements($element_attribute = null) 262 { 263 if ($element_attribute === null) { 264 $element_attribute = [ 265 'a' => 'href', 266 'area' => 'href', 267 'audio' => 'src', 268 'blockquote' => 'cite', 269 'del' => 'cite', 270 'form' => 'action', 271 'img' => [ 272 'longdesc', 273 'src' 274 ], 275 'input' => 'src', 276 'ins' => 'cite', 277 'q' => 'cite', 278 'source' => 'src', 279 'video' => [ 280 'poster', 281 'src' 282 ] 283 ]; 284 } 285 $this->replace_url_attributes = (array) $element_attribute; 286 } 287 288 /** 289 * Set the list of domains for which to force HTTPS. 290 * @see \SimplePie\Misc::https_url() 291 * Example array('biz', 'example.com', 'example.org', 'www.example.net'); 292 */ 293 public function set_https_domains($domains) 294 { 295 $this->https_domains = []; 296 foreach ($domains as $domain) { 297 $domain = trim($domain, ". \t\n\r\0\x0B"); 298 $segments = array_reverse(explode('.', $domain)); 299 $node = &$this->https_domains; 300 foreach ($segments as $segment) {//Build a tree 301 if ($node === true) { 302 break; 303 } 304 if (!isset($node[$segment])) { 305 $node[$segment] = []; 306 } 307 $node = &$node[$segment]; 308 } 309 $node = true; 310 } 311 } 312 313 /** 314 * Check if the domain is in the list of forced HTTPS. 315 */ 316 protected function is_https_domain($domain) 317 { 318 $domain = trim($domain, '. '); 319 $segments = array_reverse(explode('.', $domain)); 320 $node = &$this->https_domains; 321 foreach ($segments as $segment) {//Explore the tree 322 if (isset($node[$segment])) { 323 $node = &$node[$segment]; 324 } else { 325 break; 326 } 327 } 328 return $node === true; 329 } 330 331 /** 332 * Force HTTPS for selected Web sites. 333 */ 334 public function https_url($url) 335 { 336 return (strtolower(substr($url, 0, 7)) === 'http://') && 337 $this->is_https_domain(parse_url($url, PHP_URL_HOST)) ? 338 substr_replace($url, 's', 4, 0) : //Add the 's' to HTTPS 339 $url; 340 } 341 342 public function sanitize($data, $type, $base = '') 343 { 344 $data = trim($data); 345 if ($data !== '' || $type & \SimplePie\SimplePie::CONSTRUCT_IRI) { 346 if ($type & \SimplePie\SimplePie::CONSTRUCT_MAYBE_HTML) { 347 if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . \SimplePie\SimplePie::PCRE_HTML_ATTRIBUTE . '>)/', $data)) { 348 $type |= \SimplePie\SimplePie::CONSTRUCT_HTML; 349 } else { 350 $type |= \SimplePie\SimplePie::CONSTRUCT_TEXT; 351 } 352 } 353 354 if ($type & \SimplePie\SimplePie::CONSTRUCT_BASE64) { 355 $data = base64_decode($data); 356 } 357 358 if ($type & (\SimplePie\SimplePie::CONSTRUCT_HTML | \SimplePie\SimplePie::CONSTRUCT_XHTML)) { 359 if (!class_exists('DOMDocument')) { 360 throw new \SimplePie\Exception('DOMDocument not found, unable to use sanitizer'); 361 } 362 $document = new \DOMDocument(); 363 $document->encoding = 'UTF-8'; 364 365 $data = $this->preprocess($data, $type); 366 367 set_error_handler(['SimplePie\Misc', 'silence_errors']); 368 $document->loadHTML($data); 369 restore_error_handler(); 370 371 $xpath = new \DOMXPath($document); 372 373 // Strip comments 374 if ($this->strip_comments) { 375 $comments = $xpath->query('//comment()'); 376 377 foreach ($comments as $comment) { 378 $comment->parentNode->removeChild($comment); 379 } 380 } 381 382 // Strip out HTML tags and attributes that might cause various security problems. 383 // Based on recommendations by Mark Pilgrim at: 384 // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely 385 if ($this->strip_htmltags) { 386 foreach ($this->strip_htmltags as $tag) { 387 $this->strip_tag($tag, $document, $xpath, $type); 388 } 389 } 390 391 if ($this->rename_attributes) { 392 foreach ($this->rename_attributes as $attrib) { 393 $this->rename_attr($attrib, $xpath); 394 } 395 } 396 397 if ($this->strip_attributes) { 398 foreach ($this->strip_attributes as $attrib) { 399 $this->strip_attr($attrib, $xpath); 400 } 401 } 402 403 if ($this->add_attributes) { 404 foreach ($this->add_attributes as $tag => $valuePairs) { 405 $this->add_attr($tag, $valuePairs, $document); 406 } 407 } 408 409 // Replace relative URLs 410 $this->base = $base; 411 foreach ($this->replace_url_attributes as $element => $attributes) { 412 $this->replace_urls($document, $element, $attributes); 413 } 414 415 // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags. 416 if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache) { 417 $images = $document->getElementsByTagName('img'); 418 419 foreach ($images as $img) { 420 if ($img->hasAttribute('src')) { 421 $image_url = $this->cache_namefilter->filter($img->getAttribute('src')); 422 $cache = $this->get_cache($image_url); 423 424 if ($cache->get_data($image_url, false)) { 425 $img->setAttribute('src', $this->image_handler . $image_url); 426 } else { 427 $file = $this->registry->create(File::class, [$img->getAttribute('src'), $this->timeout, 5, ['X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']], $this->useragent, $this->force_fsockopen]); 428 $headers = $file->headers; 429 430 if ($file->success && ($file->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) { 431 if ($cache->set_data($image_url, ['headers' => $file->headers, 'body' => $file->body], $this->cache_duration)) { 432 $img->setAttribute('src', $this->image_handler . $image_url); 433 } else { 434 trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); 435 } 436 } 437 } 438 } 439 } 440 } 441 442 // Get content node 443 $div = $document->getElementsByTagName('body')->item(0)->firstChild; 444 // Finally, convert to a HTML string 445 $data = trim($document->saveHTML($div)); 446 447 if ($this->remove_div) { 448 $data = preg_replace('/^<div' . \SimplePie\SimplePie::PCRE_XML_ATTRIBUTE . '>/', '', $data); 449 $data = preg_replace('/<\/div>$/', '', $data); 450 } else { 451 $data = preg_replace('/^<div' . \SimplePie\SimplePie::PCRE_XML_ATTRIBUTE . '>/', '<div>', $data); 452 } 453 454 $data = str_replace('</source>', '', $data); 455 } 456 457 if ($type & \SimplePie\SimplePie::CONSTRUCT_IRI) { 458 $absolute = $this->registry->call(Misc::class, 'absolutize_url', [$data, $base]); 459 if ($absolute !== false) { 460 $data = $absolute; 461 } 462 } 463 464 if ($type & (\SimplePie\SimplePie::CONSTRUCT_TEXT | \SimplePie\SimplePie::CONSTRUCT_IRI)) { 465 $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8'); 466 } 467 468 if ($this->output_encoding !== 'UTF-8') { 469 $data = $this->registry->call(Misc::class, 'change_encoding', [$data, 'UTF-8', $this->output_encoding]); 470 } 471 } 472 return $data; 473 } 474 475 protected function preprocess($html, $type) 476 { 477 $ret = ''; 478 $html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html); 479 if ($type & ~\SimplePie\SimplePie::CONSTRUCT_XHTML) { 480 // Atom XHTML constructs are wrapped with a div by default 481 // Note: No protection if $html contains a stray </div>! 482 $html = '<div>' . $html . '</div>'; 483 $ret .= '<!DOCTYPE html>'; 484 $content_type = 'text/html'; 485 } else { 486 $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'; 487 $content_type = 'application/xhtml+xml'; 488 } 489 490 $ret .= '<html><head>'; 491 $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />'; 492 $ret .= '</head><body>' . $html . '</body></html>'; 493 return $ret; 494 } 495 496 public function replace_urls($document, $tag, $attributes) 497 { 498 if (!is_array($attributes)) { 499 $attributes = [$attributes]; 500 } 501 502 if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) { 503 $elements = $document->getElementsByTagName($tag); 504 foreach ($elements as $element) { 505 foreach ($attributes as $attribute) { 506 if ($element->hasAttribute($attribute)) { 507 $value = $this->registry->call(Misc::class, 'absolutize_url', [$element->getAttribute($attribute), $this->base]); 508 if ($value !== false) { 509 $value = $this->https_url($value); 510 $element->setAttribute($attribute, $value); 511 } 512 } 513 } 514 } 515 } 516 } 517 518 public function do_strip_htmltags($match) 519 { 520 if ($this->encode_instead_of_strip) { 521 if (isset($match[4]) && !in_array(strtolower($match[1]), ['script', 'style'])) { 522 $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8'); 523 $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8'); 524 return "<$match[1]$match[2]>$match[3]</$match[1]>"; 525 } else { 526 return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8'); 527 } 528 } elseif (isset($match[4]) && !in_array(strtolower($match[1]), ['script', 'style'])) { 529 return $match[4]; 530 } else { 531 return ''; 532 } 533 } 534 535 protected function strip_tag($tag, $document, $xpath, $type) 536 { 537 $elements = $xpath->query('body//' . $tag); 538 if ($this->encode_instead_of_strip) { 539 foreach ($elements as $element) { 540 $fragment = $document->createDocumentFragment(); 541 542 // For elements which aren't script or style, include the tag itself 543 if (!in_array($tag, ['script', 'style'])) { 544 $text = '<' . $tag; 545 if ($element->hasAttributes()) { 546 $attrs = []; 547 foreach ($element->attributes as $name => $attr) { 548 $value = $attr->value; 549 550 // In XHTML, empty values should never exist, so we repeat the value 551 if (empty($value) && ($type & \SimplePie\SimplePie::CONSTRUCT_XHTML)) { 552 $value = $name; 553 } 554 // For HTML, empty is fine 555 elseif (empty($value) && ($type & \SimplePie\SimplePie::CONSTRUCT_HTML)) { 556 $attrs[] = $name; 557 continue; 558 } 559 560 // Standard attribute text 561 $attrs[] = $name . '="' . $attr->value . '"'; 562 } 563 $text .= ' ' . implode(' ', $attrs); 564 } 565 $text .= '>'; 566 $fragment->appendChild(new \DOMText($text)); 567 } 568 569 $number = $element->childNodes->length; 570 for ($i = $number; $i > 0; $i--) { 571 $child = $element->childNodes->item(0); 572 $fragment->appendChild($child); 573 } 574 575 if (!in_array($tag, ['script', 'style'])) { 576 $fragment->appendChild(new \DOMText('</' . $tag . '>')); 577 } 578 579 $element->parentNode->replaceChild($fragment, $element); 580 } 581 582 return; 583 } elseif (in_array($tag, ['script', 'style'])) { 584 foreach ($elements as $element) { 585 $element->parentNode->removeChild($element); 586 } 587 588 return; 589 } else { 590 foreach ($elements as $element) { 591 $fragment = $document->createDocumentFragment(); 592 $number = $element->childNodes->length; 593 for ($i = $number; $i > 0; $i--) { 594 $child = $element->childNodes->item(0); 595 $fragment->appendChild($child); 596 } 597 598 $element->parentNode->replaceChild($fragment, $element); 599 } 600 } 601 } 602 603 protected function strip_attr($attrib, $xpath) 604 { 605 $elements = $xpath->query('//*[@' . $attrib . ']'); 606 607 foreach ($elements as $element) { 608 $element->removeAttribute($attrib); 609 } 610 } 611 612 protected function rename_attr($attrib, $xpath) 613 { 614 $elements = $xpath->query('//*[@' . $attrib . ']'); 615 616 foreach ($elements as $element) { 617 $element->setAttribute('data-sanitized-' . $attrib, $element->getAttribute($attrib)); 618 $element->removeAttribute($attrib); 619 } 620 } 621 622 protected function add_attr($tag, $valuePairs, $document) 623 { 624 $elements = $document->getElementsByTagName($tag); 625 foreach ($elements as $element) { 626 foreach ($valuePairs as $attrib => $value) { 627 $element->setAttribute($attrib, $value); 628 } 629 } 630 } 631 632 /** 633 * Get a DataCache 634 * 635 * @param string $image_url Only needed for BC, can be removed in SimplePie 2.0.0 636 * 637 * @return DataCache 638 */ 639 private function get_cache($image_url = '') 640 { 641 if ($this->cache === null) { 642 // @trigger_error(sprintf('Not providing as PSR-16 cache implementation is deprecated since SimplePie 1.8.0, please use "SimplePie\SimplePie::set_cache()".'), \E_USER_DEPRECATED); 643 $cache = $this->registry->call(Cache::class, 'get_handler', [ 644 $this->cache_location, 645 $image_url, 646 Base::TYPE_IMAGE 647 ]); 648 649 return new BaseDataCache($cache); 650 } 651 652 return $this->cache; 653 } 656 654 } 655 656 class_alias('SimplePie\Sanitize', 'SimplePie_Sanitize');
Note: See TracChangeset
for help on using the changeset viewer.