Changeset 59141 for trunk/src/wp-includes/SimplePie/src/Parser.php
- Timestamp:
- 09/30/2024 10:48:16 PM (8 months ago)
- Location:
- trunk/src/wp-includes/SimplePie/src
- Files:
-
- 1 added
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/SimplePie/src/Parser.php
r59140 r59141 1 1 <?php 2 2 3 /** 3 4 * SimplePie … … 6 7 * Takes the hard work out of managing a complete RSS/Atom solution. 7 8 * 8 * Copyright (c) 2004-20 16, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors9 * Copyright (c) 2004-2022, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors 9 10 * All rights reserved. 10 11 * … … 42 43 */ 43 44 45 namespace SimplePie; 46 47 use SimplePie\XML\Declaration\Parser as DeclarationParser; 48 44 49 /** 45 50 * Parses XML into something sane 46 51 * 47 52 * 48 * This class can be overloaded with {@see SimplePie::set_parser_class()}53 * This class can be overloaded with {@see \SimplePie\SimplePie::set_parser_class()} 49 54 * 50 55 * @package SimplePie 51 56 * @subpackage Parsing 52 57 */ 53 class SimplePie_Parser58 class Parser implements RegistryAware 54 59 { 55 var $error_code; 56 var $error_string; 57 var $current_line; 58 var $current_column; 59 var $current_byte; 60 var $separator = ' '; 61 var $namespace = array(''); 62 var $element = array(''); 63 var $xml_base = array(''); 64 var $xml_base_explicit = array(false); 65 var $xml_lang = array(''); 66 var $data = array(); 67 var $datas = array(array()); 68 var $current_xhtml_construct = -1; 69 var $encoding; 70 protected $registry; 71 72 public function set_registry(SimplePie_Registry $registry) 73 { 74 $this->registry = $registry; 75 } 76 77 public function parse(&$data, $encoding, $url = '') 78 { 79 if (class_exists('DOMXpath') && function_exists('Mf2\parse')) { 80 $doc = new DOMDocument(); 81 @$doc->loadHTML($data); 82 $xpath = new DOMXpath($doc); 83 // Check for both h-feed and h-entry, as both a feed with no entries 84 // and a list of entries without an h-feed wrapper are both valid. 85 $query = '//*[contains(concat(" ", @class, " "), " h-feed ") or '. 86 'contains(concat(" ", @class, " "), " h-entry ")]'; 87 $result = $xpath->query($query); 88 if ($result->length !== 0) { 89 return $this->parse_microformats($data, $url); 90 } 91 } 92 93 // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character 94 if (strtoupper($encoding) === 'US-ASCII') 95 { 96 $this->encoding = 'UTF-8'; 97 } 98 else 99 { 100 $this->encoding = $encoding; 101 } 102 103 // Strip BOM: 104 // UTF-32 Big Endian BOM 105 if (substr($data, 0, 4) === "\x00\x00\xFE\xFF") 106 { 107 $data = substr($data, 4); 108 } 109 // UTF-32 Little Endian BOM 110 elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00") 111 { 112 $data = substr($data, 4); 113 } 114 // UTF-16 Big Endian BOM 115 elseif (substr($data, 0, 2) === "\xFE\xFF") 116 { 117 $data = substr($data, 2); 118 } 119 // UTF-16 Little Endian BOM 120 elseif (substr($data, 0, 2) === "\xFF\xFE") 121 { 122 $data = substr($data, 2); 123 } 124 // UTF-8 BOM 125 elseif (substr($data, 0, 3) === "\xEF\xBB\xBF") 126 { 127 $data = substr($data, 3); 128 } 129 130 if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false) 131 { 132 $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5))); 133 if ($declaration->parse()) 134 { 135 $data = substr($data, $pos + 2); 136 $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' ."\n". $this->declare_html_entities() . $data; 137 } 138 else 139 { 140 $this->error_string = 'SimplePie bug! Please report this!'; 141 return false; 142 } 143 } 144 145 $return = true; 146 147 static $xml_is_sane = null; 148 if ($xml_is_sane === null) 149 { 150 $parser_check = xml_parser_create(); 151 xml_parse_into_struct($parser_check, '<foo>&</foo>', $values); 152 xml_parser_free($parser_check); 153 $xml_is_sane = isset($values[0]['value']); 154 } 155 156 // Create the parser 157 if ($xml_is_sane) 158 { 159 $xml = xml_parser_create_ns($this->encoding, $this->separator); 160 xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1); 161 xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0); 162 xml_set_object($xml, $this); 163 xml_set_character_data_handler($xml, 'cdata'); 164 xml_set_element_handler($xml, 'tag_open', 'tag_close'); 165 166 // Parse! 167 $wrapper = @is_writable(sys_get_temp_dir()) ? 'php://temp' : 'php://memory'; 168 if (($stream = fopen($wrapper, 'r+')) && 169 fwrite($stream, $data) && 170 rewind($stream)) 171 { 172 //Parse by chunks not to use too much memory 173 do 174 { 175 $stream_data = fread($stream, 1048576); 176 if (!xml_parse($xml, $stream_data === false ? '' : $stream_data, feof($stream))) 177 { 178 $this->error_code = xml_get_error_code($xml); 179 $this->error_string = xml_error_string($this->error_code); 180 $return = false; 181 break; 182 } 183 } while (!feof($stream)); 184 fclose($stream); 185 } 186 else 187 { 188 $return = false; 189 } 190 191 $this->current_line = xml_get_current_line_number($xml); 192 $this->current_column = xml_get_current_column_number($xml); 193 $this->current_byte = xml_get_current_byte_index($xml); 194 xml_parser_free($xml); 195 return $return; 196 } 197 198 libxml_clear_errors(); 199 $xml = new XMLReader(); 200 $xml->xml($data); 201 while (@$xml->read()) 202 { 203 switch ($xml->nodeType) 204 { 205 206 case constant('XMLReader::END_ELEMENT'): 207 if ($xml->namespaceURI !== '') 208 { 209 $tagName = $xml->namespaceURI . $this->separator . $xml->localName; 210 } 211 else 212 { 213 $tagName = $xml->localName; 214 } 215 $this->tag_close(null, $tagName); 216 break; 217 case constant('XMLReader::ELEMENT'): 218 $empty = $xml->isEmptyElement; 219 if ($xml->namespaceURI !== '') 220 { 221 $tagName = $xml->namespaceURI . $this->separator . $xml->localName; 222 } 223 else 224 { 225 $tagName = $xml->localName; 226 } 227 $attributes = array(); 228 while ($xml->moveToNextAttribute()) 229 { 230 if ($xml->namespaceURI !== '') 231 { 232 $attrName = $xml->namespaceURI . $this->separator . $xml->localName; 233 } 234 else 235 { 236 $attrName = $xml->localName; 237 } 238 $attributes[$attrName] = $xml->value; 239 } 240 $this->tag_open(null, $tagName, $attributes); 241 if ($empty) 242 { 243 $this->tag_close(null, $tagName); 244 } 245 break; 246 case constant('XMLReader::TEXT'): 247 248 case constant('XMLReader::CDATA'): 249 $this->cdata(null, $xml->value); 250 break; 251 } 252 } 253 if ($error = libxml_get_last_error()) 254 { 255 $this->error_code = $error->code; 256 $this->error_string = $error->message; 257 $this->current_line = $error->line; 258 $this->current_column = $error->column; 259 return false; 260 } 261 262 return true; 263 } 264 265 public function get_error_code() 266 { 267 return $this->error_code; 268 } 269 270 public function get_error_string() 271 { 272 return $this->error_string; 273 } 274 275 public function get_current_line() 276 { 277 return $this->current_line; 278 } 279 280 public function get_current_column() 281 { 282 return $this->current_column; 283 } 284 285 public function get_current_byte() 286 { 287 return $this->current_byte; 288 } 289 290 public function get_data() 291 { 292 return $this->data; 293 } 294 295 public function tag_open($parser, $tag, $attributes) 296 { 297 list($this->namespace[], $this->element[]) = $this->split_ns($tag); 298 299 $attribs = array(); 300 foreach ($attributes as $name => $value) 301 { 302 list($attrib_namespace, $attribute) = $this->split_ns($name); 303 $attribs[$attrib_namespace][$attribute] = $value; 304 } 305 306 if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['base'])) 307 { 308 $base = $this->registry->call('Misc', 'absolutize_url', array($attribs[SIMPLEPIE_NAMESPACE_XML]['base'], end($this->xml_base))); 309 if ($base !== false) 310 { 311 $this->xml_base[] = $base; 312 $this->xml_base_explicit[] = true; 313 } 314 } 315 else 316 { 317 $this->xml_base[] = end($this->xml_base); 318 $this->xml_base_explicit[] = end($this->xml_base_explicit); 319 } 320 321 if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['lang'])) 322 { 323 $this->xml_lang[] = $attribs[SIMPLEPIE_NAMESPACE_XML]['lang']; 324 } 325 else 326 { 327 $this->xml_lang[] = end($this->xml_lang); 328 } 329 330 if ($this->current_xhtml_construct >= 0) 331 { 332 $this->current_xhtml_construct++; 333 if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML) 334 { 335 $this->data['data'] .= '<' . end($this->element); 336 if (isset($attribs[''])) 337 { 338 foreach ($attribs[''] as $name => $value) 339 { 340 $this->data['data'] .= ' ' . $name . '="' . htmlspecialchars($value, ENT_COMPAT, $this->encoding) . '"'; 341 } 342 } 343 $this->data['data'] .= '>'; 344 } 345 } 346 else 347 { 348 $this->datas[] =& $this->data; 349 $this->data =& $this->data['child'][end($this->namespace)][end($this->element)][]; 350 $this->data = array('data' => '', 'attribs' => $attribs, 'xml_base' => end($this->xml_base), 'xml_base_explicit' => end($this->xml_base_explicit), 'xml_lang' => end($this->xml_lang)); 351 if ((end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_03 && in_array(end($this->element), array('title', 'tagline', 'copyright', 'info', 'summary', 'content')) && isset($attribs['']['mode']) && $attribs['']['mode'] === 'xml') 352 || (end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_10 && in_array(end($this->element), array('rights', 'subtitle', 'summary', 'info', 'title', 'content')) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml') 353 || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_20 && in_array(end($this->element), array('title'))) 354 || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_090 && in_array(end($this->element), array('title'))) 355 || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_10 && in_array(end($this->element), array('title')))) 356 { 357 $this->current_xhtml_construct = 0; 358 } 359 } 360 } 361 362 public function cdata($parser, $cdata) 363 { 364 if ($this->current_xhtml_construct >= 0) 365 { 366 $this->data['data'] .= htmlspecialchars($cdata, ENT_QUOTES, $this->encoding); 367 } 368 else 369 { 370 $this->data['data'] .= $cdata; 371 } 372 } 373 374 public function tag_close($parser, $tag) 375 { 376 if ($this->current_xhtml_construct >= 0) 377 { 378 $this->current_xhtml_construct--; 379 if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML && !in_array(end($this->element), array('area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param'))) 380 { 381 $this->data['data'] .= '</' . end($this->element) . '>'; 382 } 383 } 384 if ($this->current_xhtml_construct === -1) 385 { 386 $this->data =& $this->datas[count($this->datas) - 1]; 387 array_pop($this->datas); 388 } 389 390 array_pop($this->element); 391 array_pop($this->namespace); 392 array_pop($this->xml_base); 393 array_pop($this->xml_base_explicit); 394 array_pop($this->xml_lang); 395 } 396 397 public function split_ns($string) 398 { 399 static $cache = array(); 400 if (!isset($cache[$string])) 401 { 402 if ($pos = strpos($string, $this->separator)) 403 { 404 static $separator_length; 405 if (!$separator_length) 406 { 407 $separator_length = strlen($this->separator); 408 } 409 $namespace = substr($string, 0, $pos); 410 $local_name = substr($string, $pos + $separator_length); 411 if (strtolower($namespace) === SIMPLEPIE_NAMESPACE_ITUNES) 412 { 413 $namespace = SIMPLEPIE_NAMESPACE_ITUNES; 414 } 415 416 // Normalize the Media RSS namespaces 417 if ($namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG || 418 $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG2 || 419 $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG3 || 420 $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG4 || 421 $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG5 ) 422 { 423 $namespace = SIMPLEPIE_NAMESPACE_MEDIARSS; 424 } 425 $cache[$string] = array($namespace, $local_name); 426 } 427 else 428 { 429 $cache[$string] = array('', $string); 430 } 431 } 432 return $cache[$string]; 433 } 434 435 private function parse_hcard($data, $category = false) { 436 $name = ''; 437 $link = ''; 438 // Check if h-card is set and pass that information on in the link. 439 if (isset($data['type']) && in_array('h-card', $data['type'])) { 440 if (isset($data['properties']['name'][0])) { 441 $name = $data['properties']['name'][0]; 442 } 443 if (isset($data['properties']['url'][0])) { 444 $link = $data['properties']['url'][0]; 445 if ($name === '') { 446 $name = $link; 447 } 448 else { 449 // can't have commas in categories. 450 $name = str_replace(',', '', $name); 451 } 452 $person_tag = $category ? '<span class="person-tag"></span>' : ''; 453 return '<a class="h-card" href="'.$link.'">'.$person_tag.$name.'</a>'; 454 } 455 } 456 return isset($data['value']) ? $data['value'] : ''; 457 } 458 459 private function parse_microformats(&$data, $url) { 460 $feed_title = ''; 461 $feed_author = NULL; 462 $author_cache = array(); 463 $items = array(); 464 $entries = array(); 465 $mf = Mf2\parse($data, $url); 466 // First look for an h-feed. 467 $h_feed = array(); 468 foreach ($mf['items'] as $mf_item) { 469 if (in_array('h-feed', $mf_item['type'])) { 470 $h_feed = $mf_item; 471 break; 472 } 473 // Also look for h-feed or h-entry in the children of each top level item. 474 if (!isset($mf_item['children'][0]['type'])) continue; 475 if (in_array('h-feed', $mf_item['children'][0]['type'])) { 476 $h_feed = $mf_item['children'][0]; 477 // In this case the parent of the h-feed may be an h-card, so use it as 478 // the feed_author. 479 if (in_array('h-card', $mf_item['type'])) $feed_author = $mf_item; 480 break; 481 } 482 else if (in_array('h-entry', $mf_item['children'][0]['type'])) { 483 $entries = $mf_item['children']; 484 // In this case the parent of the h-entry list may be an h-card, so use 485 // it as the feed_author. 486 if (in_array('h-card', $mf_item['type'])) $feed_author = $mf_item; 487 break; 488 } 489 } 490 if (isset($h_feed['children'])) { 491 $entries = $h_feed['children']; 492 // Also set the feed title and store author from the h-feed if available. 493 if (isset($mf['items'][0]['properties']['name'][0])) { 494 $feed_title = $mf['items'][0]['properties']['name'][0]; 495 } 496 if (isset($mf['items'][0]['properties']['author'][0])) { 497 $feed_author = $mf['items'][0]['properties']['author'][0]; 498 } 499 } 500 else if (count($entries) === 0) { 501 $entries = $mf['items']; 502 } 503 for ($i = 0; $i < count($entries); $i++) { 504 $entry = $entries[$i]; 505 if (in_array('h-entry', $entry['type'])) { 506 $item = array(); 507 $title = ''; 508 $description = ''; 509 if (isset($entry['properties']['url'][0])) { 510 $link = $entry['properties']['url'][0]; 511 if (isset($link['value'])) $link = $link['value']; 512 $item['link'] = array(array('data' => $link)); 513 } 514 if (isset($entry['properties']['uid'][0])) { 515 $guid = $entry['properties']['uid'][0]; 516 if (isset($guid['value'])) $guid = $guid['value']; 517 $item['guid'] = array(array('data' => $guid)); 518 } 519 if (isset($entry['properties']['name'][0])) { 520 $title = $entry['properties']['name'][0]; 521 if (isset($title['value'])) $title = $title['value']; 522 $item['title'] = array(array('data' => $title)); 523 } 524 if (isset($entry['properties']['author'][0]) || isset($feed_author)) { 525 // author is a special case, it can be plain text or an h-card array. 526 // If it's plain text it can also be a url that should be followed to 527 // get the actual h-card. 528 $author = isset($entry['properties']['author'][0]) ? 529 $entry['properties']['author'][0] : $feed_author; 530 if (!is_string($author)) { 531 $author = $this->parse_hcard($author); 532 } 533 else if (strpos($author, 'http') === 0) { 534 if (isset($author_cache[$author])) { 535 $author = $author_cache[$author]; 536 } 537 else { 538 $mf = Mf2\fetch($author); 539 foreach ($mf['items'] as $hcard) { 540 // Only interested in an h-card by itself in this case. 541 if (!in_array('h-card', $hcard['type'])) { 542 continue; 543 } 544 // It must have a url property matching what we fetched. 545 if (!isset($hcard['properties']['url']) || 546 !(in_array($author, $hcard['properties']['url']))) { 547 continue; 548 } 549 // Save parse_hcard the trouble of finding the correct url. 550 $hcard['properties']['url'][0] = $author; 551 // Cache this h-card for the next h-entry to check. 552 $author_cache[$author] = $this->parse_hcard($hcard); 553 $author = $author_cache[$author]; 554 break; 555 } 556 } 557 } 558 $item['author'] = array(array('data' => $author)); 559 } 560 if (isset($entry['properties']['photo'][0])) { 561 // If a photo is also in content, don't need to add it again here. 562 $content = ''; 563 if (isset($entry['properties']['content'][0]['html'])) { 564 $content = $entry['properties']['content'][0]['html']; 565 } 566 $photo_list = array(); 567 for ($j = 0; $j < count($entry['properties']['photo']); $j++) { 568 $photo = $entry['properties']['photo'][$j]; 569 if (!empty($photo) && strpos($content, $photo) === false) { 570 $photo_list[] = $photo; 571 } 572 } 573 // When there's more than one photo show the first and use a lightbox. 574 // Need a permanent, unique name for the image set, but don't have 575 // anything unique except for the content itself, so use that. 576 $count = count($photo_list); 577 if ($count > 1) { 578 $image_set_id = preg_replace('/[[:^alnum:]]/', '', $photo_list[0]); 579 $description = '<p>'; 580 for ($j = 0; $j < $count; $j++) { 581 $hidden = $j === 0 ? '' : 'class="hidden" '; 582 $description .= '<a href="'.$photo_list[$j].'" '.$hidden. 583 'data-lightbox="image-set-'.$image_set_id.'">'. 584 '<img src="'.$photo_list[$j].'"></a>'; 585 } 586 $description .= '<br><b>'.$count.' photos</b></p>'; 587 } 588 else if ($count == 1) { 589 $description = '<p><img src="'.$photo_list[0].'"></p>'; 590 } 591 } 592 if (isset($entry['properties']['content'][0]['html'])) { 593 // e-content['value'] is the same as p-name when they are on the same 594 // element. Use this to replace title with a strip_tags version so 595 // that alt text from images is not included in the title. 596 if ($entry['properties']['content'][0]['value'] === $title) { 597 $title = strip_tags($entry['properties']['content'][0]['html']); 598 $item['title'] = array(array('data' => $title)); 599 } 600 $description .= $entry['properties']['content'][0]['html']; 601 if (isset($entry['properties']['in-reply-to'][0])) { 602 $in_reply_to = ''; 603 if (is_string($entry['properties']['in-reply-to'][0])) { 604 $in_reply_to = $entry['properties']['in-reply-to'][0]; 605 } 606 else if (isset($entry['properties']['in-reply-to'][0]['value'])) { 607 $in_reply_to = $entry['properties']['in-reply-to'][0]['value']; 608 } 609 if ($in_reply_to !== '') { 610 $description .= '<p><span class="in-reply-to"></span> '. 611 '<a href="'.$in_reply_to.'">'.$in_reply_to.'</a><p>'; 612 } 613 } 614 $item['description'] = array(array('data' => $description)); 615 } 616 if (isset($entry['properties']['category'])) { 617 $category_csv = ''; 618 // Categories can also contain h-cards. 619 foreach ($entry['properties']['category'] as $category) { 620 if ($category_csv !== '') $category_csv .= ', '; 621 if (is_string($category)) { 622 // Can't have commas in categories. 623 $category_csv .= str_replace(',', '', $category); 624 } 625 else { 626 $category_csv .= $this->parse_hcard($category, true); 627 } 628 } 629 $item['category'] = array(array('data' => $category_csv)); 630 } 631 if (isset($entry['properties']['published'][0])) { 632 $timestamp = strtotime($entry['properties']['published'][0]); 633 $pub_date = date('F j Y g:ia', $timestamp).' GMT'; 634 $item['pubDate'] = array(array('data' => $pub_date)); 635 } 636 // The title and description are set to the empty string to represent 637 // a deleted item (which also makes it an invalid rss item). 638 if (isset($entry['properties']['deleted'][0])) { 639 $item['title'] = array(array('data' => '')); 640 $item['description'] = array(array('data' => '')); 641 } 642 $items[] = array('child' => array('' => $item)); 643 } 644 } 645 // Mimic RSS data format when storing microformats. 646 $link = array(array('data' => $url)); 647 $image = ''; 648 if (!is_string($feed_author) && 649 isset($feed_author['properties']['photo'][0])) { 650 $image = array(array('child' => array('' => array('url' => 651 array(array('data' => $feed_author['properties']['photo'][0])))))); 652 } 653 // Use the name given for the h-feed, or get the title from the html. 654 if ($feed_title !== '') { 655 $feed_title = array(array('data' => htmlspecialchars($feed_title))); 656 } 657 else if ($position = strpos($data, '<title>')) { 658 $start = $position < 200 ? 0 : $position - 200; 659 $check = substr($data, $start, 400); 660 $matches = array(); 661 if (preg_match('/<title>(.+)<\/title>/', $check, $matches)) { 662 $feed_title = array(array('data' => htmlspecialchars($matches[1]))); 663 } 664 } 665 $channel = array('channel' => array(array('child' => array('' => 666 array('link' => $link, 'image' => $image, 'title' => $feed_title, 667 'item' => $items))))); 668 $rss = array(array('attribs' => array('' => array('version' => '2.0')), 669 'child' => array('' => $channel))); 670 $this->data = array('child' => array('' => array('rss' => $rss))); 671 return true; 672 } 673 674 private function declare_html_entities() { 675 // This is required because the RSS specification says that entity-encoded 676 // html is allowed, but the xml specification says they must be declared. 677 return '<!DOCTYPE html [ <!ENTITY nbsp " "> <!ENTITY iexcl "¡"> <!ENTITY cent "¢"> <!ENTITY pound "£"> <!ENTITY curren "¤"> <!ENTITY yen "¥"> <!ENTITY brvbar "¦"> <!ENTITY sect "§"> <!ENTITY uml "¨"> <!ENTITY copy "©"> <!ENTITY ordf "ª"> <!ENTITY laquo "«"> <!ENTITY not "¬"> <!ENTITY shy "­"> <!ENTITY reg "®"> <!ENTITY macr "¯"> <!ENTITY deg "°"> <!ENTITY plusmn "±"> <!ENTITY sup2 "²"> <!ENTITY sup3 "³"> <!ENTITY acute "´"> <!ENTITY micro "µ"> <!ENTITY para "¶"> <!ENTITY middot "·"> <!ENTITY cedil "¸"> <!ENTITY sup1 "¹"> <!ENTITY ordm "º"> <!ENTITY raquo "»"> <!ENTITY frac14 "¼"> <!ENTITY frac12 "½"> <!ENTITY frac34 "¾"> <!ENTITY iquest "¿"> <!ENTITY Agrave "À"> <!ENTITY Aacute "Á"> <!ENTITY Acirc "Â"> <!ENTITY Atilde "Ã"> <!ENTITY Auml "Ä"> <!ENTITY Aring "Å"> <!ENTITY AElig "Æ"> <!ENTITY Ccedil "Ç"> <!ENTITY Egrave "È"> <!ENTITY Eacute "É"> <!ENTITY Ecirc "Ê"> <!ENTITY Euml "Ë"> <!ENTITY Igrave "Ì"> <!ENTITY Iacute "Í"> <!ENTITY Icirc "Î"> <!ENTITY Iuml "Ï"> <!ENTITY ETH "Ð"> <!ENTITY Ntilde "Ñ"> <!ENTITY Ograve "Ò"> <!ENTITY Oacute "Ó"> <!ENTITY Ocirc "Ô"> <!ENTITY Otilde "Õ"> <!ENTITY Ouml "Ö"> <!ENTITY times "×"> <!ENTITY Oslash "Ø"> <!ENTITY Ugrave "Ù"> <!ENTITY Uacute "Ú"> <!ENTITY Ucirc "Û"> <!ENTITY Uuml "Ü"> <!ENTITY Yacute "Ý"> <!ENTITY THORN "Þ"> <!ENTITY szlig "ß"> <!ENTITY agrave "à"> <!ENTITY aacute "á"> <!ENTITY acirc "â"> <!ENTITY atilde "ã"> <!ENTITY auml "ä"> <!ENTITY aring "å"> <!ENTITY aelig "æ"> <!ENTITY ccedil "ç"> <!ENTITY egrave "è"> <!ENTITY eacute "é"> <!ENTITY ecirc "ê"> <!ENTITY euml "ë"> <!ENTITY igrave "ì"> <!ENTITY iacute "í"> <!ENTITY icirc "î"> <!ENTITY iuml "ï"> <!ENTITY eth "ð"> <!ENTITY ntilde "ñ"> <!ENTITY ograve "ò"> <!ENTITY oacute "ó"> <!ENTITY ocirc "ô"> <!ENTITY otilde "õ"> <!ENTITY ouml "ö"> <!ENTITY divide "÷"> <!ENTITY oslash "ø"> <!ENTITY ugrave "ù"> <!ENTITY uacute "ú"> <!ENTITY ucirc "û"> <!ENTITY uuml "ü"> <!ENTITY yacute "ý"> <!ENTITY thorn "þ"> <!ENTITY yuml "ÿ"> <!ENTITY OElig "Œ"> <!ENTITY oelig "œ"> <!ENTITY Scaron "Š"> <!ENTITY scaron "š"> <!ENTITY Yuml "Ÿ"> <!ENTITY fnof "ƒ"> <!ENTITY circ "ˆ"> <!ENTITY tilde "˜"> <!ENTITY Alpha "Α"> <!ENTITY Beta "Β"> <!ENTITY Gamma "Γ"> <!ENTITY Epsilon "Ε"> <!ENTITY Zeta "Ζ"> <!ENTITY Eta "Η"> <!ENTITY Theta "Θ"> <!ENTITY Iota "Ι"> <!ENTITY Kappa "Κ"> <!ENTITY Lambda "Λ"> <!ENTITY Mu "Μ"> <!ENTITY Nu "Ν"> <!ENTITY Xi "Ξ"> <!ENTITY Omicron "Ο"> <!ENTITY Pi "Π"> <!ENTITY Rho "Ρ"> <!ENTITY Sigma "Σ"> <!ENTITY Tau "Τ"> <!ENTITY Upsilon "Υ"> <!ENTITY Phi "Φ"> <!ENTITY Chi "Χ"> <!ENTITY Psi "Ψ"> <!ENTITY Omega "Ω"> <!ENTITY alpha "α"> <!ENTITY beta "β"> <!ENTITY gamma "γ"> <!ENTITY delta "δ"> <!ENTITY epsilon "ε"> <!ENTITY zeta "ζ"> <!ENTITY eta "η"> <!ENTITY theta "θ"> <!ENTITY iota "ι"> <!ENTITY kappa "κ"> <!ENTITY lambda "λ"> <!ENTITY mu "μ"> <!ENTITY nu "ν"> <!ENTITY xi "ξ"> <!ENTITY omicron "ο"> <!ENTITY pi "π"> <!ENTITY rho "ρ"> <!ENTITY sigmaf "ς"> <!ENTITY sigma "σ"> <!ENTITY tau "τ"> <!ENTITY upsilon "υ"> <!ENTITY phi "φ"> <!ENTITY chi "χ"> <!ENTITY psi "ψ"> <!ENTITY omega "ω"> <!ENTITY thetasym "ϑ"> <!ENTITY upsih "ϒ"> <!ENTITY piv "ϖ"> <!ENTITY ensp " "> <!ENTITY emsp " "> <!ENTITY thinsp " "> <!ENTITY zwnj "‌"> <!ENTITY zwj "‍"> <!ENTITY lrm "‎"> <!ENTITY rlm "‏"> <!ENTITY ndash "–"> <!ENTITY mdash "—"> <!ENTITY lsquo "‘"> <!ENTITY rsquo "’"> <!ENTITY sbquo "‚"> <!ENTITY ldquo "“"> <!ENTITY rdquo "”"> <!ENTITY bdquo "„"> <!ENTITY dagger "†"> <!ENTITY Dagger "‡"> <!ENTITY bull "•"> <!ENTITY hellip "…"> <!ENTITY permil "‰"> <!ENTITY prime "′"> <!ENTITY Prime "″"> <!ENTITY lsaquo "‹"> <!ENTITY rsaquo "›"> <!ENTITY oline "‾"> <!ENTITY frasl "⁄"> <!ENTITY euro "€"> <!ENTITY image "ℑ"> <!ENTITY weierp "℘"> <!ENTITY real "ℜ"> <!ENTITY trade "™"> <!ENTITY alefsym "ℵ"> <!ENTITY larr "←"> <!ENTITY uarr "↑"> <!ENTITY rarr "→"> <!ENTITY darr "↓"> <!ENTITY harr "↔"> <!ENTITY crarr "↵"> <!ENTITY lArr "⇐"> <!ENTITY uArr "⇑"> <!ENTITY rArr "⇒"> <!ENTITY dArr "⇓"> <!ENTITY hArr "⇔"> <!ENTITY forall "∀"> <!ENTITY part "∂"> <!ENTITY exist "∃"> <!ENTITY empty "∅"> <!ENTITY nabla "∇"> <!ENTITY isin "∈"> <!ENTITY notin "∉"> <!ENTITY ni "∋"> <!ENTITY prod "∏"> <!ENTITY sum "∑"> <!ENTITY minus "−"> <!ENTITY lowast "∗"> <!ENTITY radic "√"> <!ENTITY prop "∝"> <!ENTITY infin "∞"> <!ENTITY ang "∠"> <!ENTITY and "∧"> <!ENTITY or "∨"> <!ENTITY cap "∩"> <!ENTITY cup "∪"> <!ENTITY int "∫"> <!ENTITY there4 "∴"> <!ENTITY sim "∼"> <!ENTITY cong "≅"> <!ENTITY asymp "≈"> <!ENTITY ne "≠"> <!ENTITY equiv "≡"> <!ENTITY le "≤"> <!ENTITY ge "≥"> <!ENTITY sub "⊂"> <!ENTITY sup "⊃"> <!ENTITY nsub "⊄"> <!ENTITY sube "⊆"> <!ENTITY supe "⊇"> <!ENTITY oplus "⊕"> <!ENTITY otimes "⊗"> <!ENTITY perp "⊥"> <!ENTITY sdot "⋅"> <!ENTITY lceil "⌈"> <!ENTITY rceil "⌉"> <!ENTITY lfloor "⌊"> <!ENTITY rfloor "⌋"> <!ENTITY lang "〈"> <!ENTITY rang "〉"> <!ENTITY loz "◊"> <!ENTITY spades "♠"> <!ENTITY clubs "♣"> <!ENTITY hearts "♥"> <!ENTITY diams "♦"> ]>'; 678 } 60 public $error_code; 61 public $error_string; 62 public $current_line; 63 public $current_column; 64 public $current_byte; 65 public $separator = ' '; 66 public $namespace = ['']; 67 public $element = ['']; 68 public $xml_base = ['']; 69 public $xml_base_explicit = [false]; 70 public $xml_lang = ['']; 71 public $data = []; 72 public $datas = [[]]; 73 public $current_xhtml_construct = -1; 74 public $encoding; 75 protected $registry; 76 77 public function set_registry(\SimplePie\Registry $registry)/* : void */ 78 { 79 $this->registry = $registry; 80 } 81 82 public function parse(&$data, $encoding, $url = '') 83 { 84 if (class_exists('DOMXpath') && function_exists('Mf2\parse')) { 85 $doc = new \DOMDocument(); 86 @$doc->loadHTML($data); 87 $xpath = new \DOMXpath($doc); 88 // Check for both h-feed and h-entry, as both a feed with no entries 89 // and a list of entries without an h-feed wrapper are both valid. 90 $query = '//*[contains(concat(" ", @class, " "), " h-feed ") or '. 91 'contains(concat(" ", @class, " "), " h-entry ")]'; 92 $result = $xpath->query($query); 93 if ($result->length !== 0) { 94 return $this->parse_microformats($data, $url); 95 } 96 } 97 98 // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character 99 if (strtoupper($encoding) === 'US-ASCII') { 100 $this->encoding = 'UTF-8'; 101 } else { 102 $this->encoding = $encoding; 103 } 104 105 // Strip BOM: 106 // UTF-32 Big Endian BOM 107 if (substr($data, 0, 4) === "\x00\x00\xFE\xFF") { 108 $data = substr($data, 4); 109 } 110 // UTF-32 Little Endian BOM 111 elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00") { 112 $data = substr($data, 4); 113 } 114 // UTF-16 Big Endian BOM 115 elseif (substr($data, 0, 2) === "\xFE\xFF") { 116 $data = substr($data, 2); 117 } 118 // UTF-16 Little Endian BOM 119 elseif (substr($data, 0, 2) === "\xFF\xFE") { 120 $data = substr($data, 2); 121 } 122 // UTF-8 BOM 123 elseif (substr($data, 0, 3) === "\xEF\xBB\xBF") { 124 $data = substr($data, 3); 125 } 126 127 if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false) { 128 $declaration = $this->registry->create(DeclarationParser::class, [substr($data, 5, $pos - 5)]); 129 if ($declaration->parse()) { 130 $data = substr($data, $pos + 2); 131 $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' ."\n". $this->declare_html_entities() . $data; 132 } else { 133 $this->error_string = 'SimplePie bug! Please report this!'; 134 return false; 135 } 136 } 137 138 $return = true; 139 140 static $xml_is_sane = null; 141 if ($xml_is_sane === null) { 142 $parser_check = xml_parser_create(); 143 xml_parse_into_struct($parser_check, '<foo>&</foo>', $values); 144 xml_parser_free($parser_check); 145 $xml_is_sane = isset($values[0]['value']); 146 } 147 148 // Create the parser 149 if ($xml_is_sane) { 150 $xml = xml_parser_create_ns($this->encoding, $this->separator); 151 xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1); 152 xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0); 153 xml_set_character_data_handler($xml, [$this, 'cdata']); 154 xml_set_element_handler($xml, [$this, 'tag_open'], [$this, 'tag_close']); 155 156 // Parse! 157 $wrapper = @is_writable(sys_get_temp_dir()) ? 'php://temp' : 'php://memory'; 158 if (($stream = fopen($wrapper, 'r+')) && 159 fwrite($stream, $data) && 160 rewind($stream)) { 161 //Parse by chunks not to use too much memory 162 do { 163 $stream_data = fread($stream, 1048576); 164 if (!xml_parse($xml, $stream_data === false ? '' : $stream_data, feof($stream))) { 165 $this->error_code = xml_get_error_code($xml); 166 $this->error_string = xml_error_string($this->error_code); 167 $return = false; 168 break; 169 } 170 } while (!feof($stream)); 171 fclose($stream); 172 } else { 173 $return = false; 174 } 175 176 $this->current_line = xml_get_current_line_number($xml); 177 $this->current_column = xml_get_current_column_number($xml); 178 $this->current_byte = xml_get_current_byte_index($xml); 179 xml_parser_free($xml); 180 return $return; 181 } 182 183 libxml_clear_errors(); 184 $xml = new \XMLReader(); 185 $xml->xml($data); 186 while (@$xml->read()) { 187 switch ($xml->nodeType) { 188 case constant('XMLReader::END_ELEMENT'): 189 if ($xml->namespaceURI !== '') { 190 $tagName = $xml->namespaceURI . $this->separator . $xml->localName; 191 } else { 192 $tagName = $xml->localName; 193 } 194 $this->tag_close(null, $tagName); 195 break; 196 case constant('XMLReader::ELEMENT'): 197 $empty = $xml->isEmptyElement; 198 if ($xml->namespaceURI !== '') { 199 $tagName = $xml->namespaceURI . $this->separator . $xml->localName; 200 } else { 201 $tagName = $xml->localName; 202 } 203 $attributes = []; 204 while ($xml->moveToNextAttribute()) { 205 if ($xml->namespaceURI !== '') { 206 $attrName = $xml->namespaceURI . $this->separator . $xml->localName; 207 } else { 208 $attrName = $xml->localName; 209 } 210 $attributes[$attrName] = $xml->value; 211 } 212 $this->tag_open(null, $tagName, $attributes); 213 if ($empty) { 214 $this->tag_close(null, $tagName); 215 } 216 break; 217 case constant('XMLReader::TEXT'): 218 219 case constant('XMLReader::CDATA'): 220 $this->cdata(null, $xml->value); 221 break; 222 } 223 } 224 if ($error = libxml_get_last_error()) { 225 $this->error_code = $error->code; 226 $this->error_string = $error->message; 227 $this->current_line = $error->line; 228 $this->current_column = $error->column; 229 return false; 230 } 231 232 return true; 233 } 234 235 public function get_error_code() 236 { 237 return $this->error_code; 238 } 239 240 public function get_error_string() 241 { 242 return $this->error_string; 243 } 244 245 public function get_current_line() 246 { 247 return $this->current_line; 248 } 249 250 public function get_current_column() 251 { 252 return $this->current_column; 253 } 254 255 public function get_current_byte() 256 { 257 return $this->current_byte; 258 } 259 260 public function get_data() 261 { 262 return $this->data; 263 } 264 265 public function tag_open($parser, $tag, $attributes) 266 { 267 [$this->namespace[], $this->element[]] = $this->split_ns($tag); 268 269 $attribs = []; 270 foreach ($attributes as $name => $value) { 271 [$attrib_namespace, $attribute] = $this->split_ns($name); 272 $attribs[$attrib_namespace][$attribute] = $value; 273 } 274 275 if (isset($attribs[\SimplePie\SimplePie::NAMESPACE_XML]['base'])) { 276 $base = $this->registry->call(Misc::class, 'absolutize_url', [$attribs[\SimplePie\SimplePie::NAMESPACE_XML]['base'], end($this->xml_base)]); 277 if ($base !== false) { 278 $this->xml_base[] = $base; 279 $this->xml_base_explicit[] = true; 280 } 281 } else { 282 $this->xml_base[] = end($this->xml_base); 283 $this->xml_base_explicit[] = end($this->xml_base_explicit); 284 } 285 286 if (isset($attribs[\SimplePie\SimplePie::NAMESPACE_XML]['lang'])) { 287 $this->xml_lang[] = $attribs[\SimplePie\SimplePie::NAMESPACE_XML]['lang']; 288 } else { 289 $this->xml_lang[] = end($this->xml_lang); 290 } 291 292 if ($this->current_xhtml_construct >= 0) { 293 $this->current_xhtml_construct++; 294 if (end($this->namespace) === \SimplePie\SimplePie::NAMESPACE_XHTML) { 295 $this->data['data'] .= '<' . end($this->element); 296 if (isset($attribs[''])) { 297 foreach ($attribs[''] as $name => $value) { 298 $this->data['data'] .= ' ' . $name . '="' . htmlspecialchars($value, ENT_COMPAT, $this->encoding) . '"'; 299 } 300 } 301 $this->data['data'] .= '>'; 302 } 303 } else { 304 $this->datas[] = &$this->data; 305 $this->data = &$this->data['child'][end($this->namespace)][end($this->element)][]; 306 $this->data = ['data' => '', 'attribs' => $attribs, 'xml_base' => end($this->xml_base), 'xml_base_explicit' => end($this->xml_base_explicit), 'xml_lang' => end($this->xml_lang)]; 307 if ((end($this->namespace) === \SimplePie\SimplePie::NAMESPACE_ATOM_03 && in_array(end($this->element), ['title', 'tagline', 'copyright', 'info', 'summary', 'content']) && isset($attribs['']['mode']) && $attribs['']['mode'] === 'xml') 308 || (end($this->namespace) === \SimplePie\SimplePie::NAMESPACE_ATOM_10 && in_array(end($this->element), ['rights', 'subtitle', 'summary', 'info', 'title', 'content']) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml') 309 || (end($this->namespace) === \SimplePie\SimplePie::NAMESPACE_RSS_20 && in_array(end($this->element), ['title'])) 310 || (end($this->namespace) === \SimplePie\SimplePie::NAMESPACE_RSS_090 && in_array(end($this->element), ['title'])) 311 || (end($this->namespace) === \SimplePie\SimplePie::NAMESPACE_RSS_10 && in_array(end($this->element), ['title']))) { 312 $this->current_xhtml_construct = 0; 313 } 314 } 315 } 316 317 public function cdata($parser, $cdata) 318 { 319 if ($this->current_xhtml_construct >= 0) { 320 $this->data['data'] .= htmlspecialchars($cdata, ENT_QUOTES, $this->encoding); 321 } else { 322 $this->data['data'] .= $cdata; 323 } 324 } 325 326 public function tag_close($parser, $tag) 327 { 328 if ($this->current_xhtml_construct >= 0) { 329 $this->current_xhtml_construct--; 330 if (end($this->namespace) === \SimplePie\SimplePie::NAMESPACE_XHTML && !in_array(end($this->element), ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param'])) { 331 $this->data['data'] .= '</' . end($this->element) . '>'; 332 } 333 } 334 if ($this->current_xhtml_construct === -1) { 335 $this->data = &$this->datas[count($this->datas) - 1]; 336 array_pop($this->datas); 337 } 338 339 array_pop($this->element); 340 array_pop($this->namespace); 341 array_pop($this->xml_base); 342 array_pop($this->xml_base_explicit); 343 array_pop($this->xml_lang); 344 } 345 346 public function split_ns($string) 347 { 348 static $cache = []; 349 if (!isset($cache[$string])) { 350 if ($pos = strpos($string, $this->separator)) { 351 static $separator_length; 352 if (!$separator_length) { 353 $separator_length = strlen($this->separator); 354 } 355 $namespace = substr($string, 0, $pos); 356 $local_name = substr($string, $pos + $separator_length); 357 if (strtolower($namespace) === \SimplePie\SimplePie::NAMESPACE_ITUNES) { 358 $namespace = \SimplePie\SimplePie::NAMESPACE_ITUNES; 359 } 360 361 // Normalize the Media RSS namespaces 362 if ($namespace === \SimplePie\SimplePie::NAMESPACE_MEDIARSS_WRONG || 363 $namespace === \SimplePie\SimplePie::NAMESPACE_MEDIARSS_WRONG2 || 364 $namespace === \SimplePie\SimplePie::NAMESPACE_MEDIARSS_WRONG3 || 365 $namespace === \SimplePie\SimplePie::NAMESPACE_MEDIARSS_WRONG4 || 366 $namespace === \SimplePie\SimplePie::NAMESPACE_MEDIARSS_WRONG5) { 367 $namespace = \SimplePie\SimplePie::NAMESPACE_MEDIARSS; 368 } 369 $cache[$string] = [$namespace, $local_name]; 370 } else { 371 $cache[$string] = ['', $string]; 372 } 373 } 374 return $cache[$string]; 375 } 376 377 private function parse_hcard($data, $category = false) 378 { 379 $name = ''; 380 $link = ''; 381 // Check if h-card is set and pass that information on in the link. 382 if (isset($data['type']) && in_array('h-card', $data['type'])) { 383 if (isset($data['properties']['name'][0])) { 384 $name = $data['properties']['name'][0]; 385 } 386 if (isset($data['properties']['url'][0])) { 387 $link = $data['properties']['url'][0]; 388 if ($name === '') { 389 $name = $link; 390 } else { 391 // can't have commas in categories. 392 $name = str_replace(',', '', $name); 393 } 394 $person_tag = $category ? '<span class="person-tag"></span>' : ''; 395 return '<a class="h-card" href="'.$link.'">'.$person_tag.$name.'</a>'; 396 } 397 } 398 return $data['value'] ?? ''; 399 } 400 401 private function parse_microformats(&$data, $url) 402 { 403 $feed_title = ''; 404 $feed_author = null; 405 $author_cache = []; 406 $items = []; 407 $entries = []; 408 $mf = \Mf2\parse($data, $url); 409 // First look for an h-feed. 410 $h_feed = []; 411 foreach ($mf['items'] as $mf_item) { 412 if (in_array('h-feed', $mf_item['type'])) { 413 $h_feed = $mf_item; 414 break; 415 } 416 // Also look for h-feed or h-entry in the children of each top level item. 417 if (!isset($mf_item['children'][0]['type'])) { 418 continue; 419 } 420 if (in_array('h-feed', $mf_item['children'][0]['type'])) { 421 $h_feed = $mf_item['children'][0]; 422 // In this case the parent of the h-feed may be an h-card, so use it as 423 // the feed_author. 424 if (in_array('h-card', $mf_item['type'])) { 425 $feed_author = $mf_item; 426 } 427 break; 428 } elseif (in_array('h-entry', $mf_item['children'][0]['type'])) { 429 $entries = $mf_item['children']; 430 // In this case the parent of the h-entry list may be an h-card, so use 431 // it as the feed_author. 432 if (in_array('h-card', $mf_item['type'])) { 433 $feed_author = $mf_item; 434 } 435 break; 436 } 437 } 438 if (isset($h_feed['children'])) { 439 $entries = $h_feed['children']; 440 // Also set the feed title and store author from the h-feed if available. 441 if (isset($mf['items'][0]['properties']['name'][0])) { 442 $feed_title = $mf['items'][0]['properties']['name'][0]; 443 } 444 if (isset($mf['items'][0]['properties']['author'][0])) { 445 $feed_author = $mf['items'][0]['properties']['author'][0]; 446 } 447 } elseif (count($entries) === 0) { 448 $entries = $mf['items']; 449 } 450 for ($i = 0; $i < count($entries); $i++) { 451 $entry = $entries[$i]; 452 if (in_array('h-entry', $entry['type'])) { 453 $item = []; 454 $title = ''; 455 $description = ''; 456 if (isset($entry['properties']['url'][0])) { 457 $link = $entry['properties']['url'][0]; 458 if (isset($link['value'])) { 459 $link = $link['value']; 460 } 461 $item['link'] = [['data' => $link]]; 462 } 463 if (isset($entry['properties']['uid'][0])) { 464 $guid = $entry['properties']['uid'][0]; 465 if (isset($guid['value'])) { 466 $guid = $guid['value']; 467 } 468 $item['guid'] = [['data' => $guid]]; 469 } 470 if (isset($entry['properties']['name'][0])) { 471 $title = $entry['properties']['name'][0]; 472 if (isset($title['value'])) { 473 $title = $title['value']; 474 } 475 $item['title'] = [['data' => $title]]; 476 } 477 if (isset($entry['properties']['author'][0]) || isset($feed_author)) { 478 // author is a special case, it can be plain text or an h-card array. 479 // If it's plain text it can also be a url that should be followed to 480 // get the actual h-card. 481 $author = $entry['properties']['author'][0] ?? $feed_author; 482 if (!is_string($author)) { 483 $author = $this->parse_hcard($author); 484 } elseif (strpos($author, 'http') === 0) { 485 if (isset($author_cache[$author])) { 486 $author = $author_cache[$author]; 487 } else { 488 $mf = \Mf2\fetch($author); 489 foreach ($mf['items'] as $hcard) { 490 // Only interested in an h-card by itself in this case. 491 if (!in_array('h-card', $hcard['type'])) { 492 continue; 493 } 494 // It must have a url property matching what we fetched. 495 if (!isset($hcard['properties']['url']) || 496 !(in_array($author, $hcard['properties']['url']))) { 497 continue; 498 } 499 // Save parse_hcard the trouble of finding the correct url. 500 $hcard['properties']['url'][0] = $author; 501 // Cache this h-card for the next h-entry to check. 502 $author_cache[$author] = $this->parse_hcard($hcard); 503 $author = $author_cache[$author]; 504 break; 505 } 506 } 507 } 508 $item['author'] = [['data' => $author]]; 509 } 510 if (isset($entry['properties']['photo'][0])) { 511 // If a photo is also in content, don't need to add it again here. 512 $content = ''; 513 if (isset($entry['properties']['content'][0]['html'])) { 514 $content = $entry['properties']['content'][0]['html']; 515 } 516 $photo_list = []; 517 for ($j = 0; $j < count($entry['properties']['photo']); $j++) { 518 $photo = $entry['properties']['photo'][$j]; 519 if (!empty($photo) && strpos($content, $photo) === false) { 520 $photo_list[] = $photo; 521 } 522 } 523 // When there's more than one photo show the first and use a lightbox. 524 // Need a permanent, unique name for the image set, but don't have 525 // anything unique except for the content itself, so use that. 526 $count = count($photo_list); 527 if ($count > 1) { 528 $image_set_id = preg_replace('/[[:^alnum:]]/', '', $photo_list[0]); 529 $description = '<p>'; 530 for ($j = 0; $j < $count; $j++) { 531 $hidden = $j === 0 ? '' : 'class="hidden" '; 532 $description .= '<a href="'.$photo_list[$j].'" '.$hidden. 533 'data-lightbox="image-set-'.$image_set_id.'">'. 534 '<img src="'.$photo_list[$j].'"></a>'; 535 } 536 $description .= '<br><b>'.$count.' photos</b></p>'; 537 } elseif ($count == 1) { 538 $description = '<p><img src="'.$photo_list[0].'"></p>'; 539 } 540 } 541 if (isset($entry['properties']['content'][0]['html'])) { 542 // e-content['value'] is the same as p-name when they are on the same 543 // element. Use this to replace title with a strip_tags version so 544 // that alt text from images is not included in the title. 545 if ($entry['properties']['content'][0]['value'] === $title) { 546 $title = strip_tags($entry['properties']['content'][0]['html']); 547 $item['title'] = [['data' => $title]]; 548 } 549 $description .= $entry['properties']['content'][0]['html']; 550 if (isset($entry['properties']['in-reply-to'][0])) { 551 $in_reply_to = ''; 552 if (is_string($entry['properties']['in-reply-to'][0])) { 553 $in_reply_to = $entry['properties']['in-reply-to'][0]; 554 } elseif (isset($entry['properties']['in-reply-to'][0]['value'])) { 555 $in_reply_to = $entry['properties']['in-reply-to'][0]['value']; 556 } 557 if ($in_reply_to !== '') { 558 $description .= '<p><span class="in-reply-to"></span> '. 559 '<a href="'.$in_reply_to.'">'.$in_reply_to.'</a><p>'; 560 } 561 } 562 $item['description'] = [['data' => $description]]; 563 } 564 if (isset($entry['properties']['category'])) { 565 $category_csv = ''; 566 // Categories can also contain h-cards. 567 foreach ($entry['properties']['category'] as $category) { 568 if ($category_csv !== '') { 569 $category_csv .= ', '; 570 } 571 if (is_string($category)) { 572 // Can't have commas in categories. 573 $category_csv .= str_replace(',', '', $category); 574 } else { 575 $category_csv .= $this->parse_hcard($category, true); 576 } 577 } 578 $item['category'] = [['data' => $category_csv]]; 579 } 580 if (isset($entry['properties']['published'][0])) { 581 $timestamp = strtotime($entry['properties']['published'][0]); 582 $pub_date = date('F j Y g:ia', $timestamp).' GMT'; 583 $item['pubDate'] = [['data' => $pub_date]]; 584 } 585 // The title and description are set to the empty string to represent 586 // a deleted item (which also makes it an invalid rss item). 587 if (isset($entry['properties']['deleted'][0])) { 588 $item['title'] = [['data' => '']]; 589 $item['description'] = [['data' => '']]; 590 } 591 $items[] = ['child' => ['' => $item]]; 592 } 593 } 594 // Mimic RSS data format when storing microformats. 595 $link = [['data' => $url]]; 596 $image = ''; 597 if (!is_string($feed_author) && 598 isset($feed_author['properties']['photo'][0])) { 599 $image = [['child' => ['' => ['url' => 600 [['data' => $feed_author['properties']['photo'][0]]]]]]]; 601 } 602 // Use the name given for the h-feed, or get the title from the html. 603 if ($feed_title !== '') { 604 $feed_title = [['data' => htmlspecialchars($feed_title)]]; 605 } elseif ($position = strpos($data, '<title>')) { 606 $start = $position < 200 ? 0 : $position - 200; 607 $check = substr($data, $start, 400); 608 $matches = []; 609 if (preg_match('/<title>(.+)<\/title>/', $check, $matches)) { 610 $feed_title = [['data' => htmlspecialchars($matches[1])]]; 611 } 612 } 613 $channel = ['channel' => [['child' => ['' => 614 ['link' => $link, 'image' => $image, 'title' => $feed_title, 615 'item' => $items]]]]]; 616 $rss = [['attribs' => ['' => ['version' => '2.0']], 617 'child' => ['' => $channel]]]; 618 $this->data = ['child' => ['' => ['rss' => $rss]]]; 619 return true; 620 } 621 622 private function declare_html_entities() 623 { 624 // This is required because the RSS specification says that entity-encoded 625 // html is allowed, but the xml specification says they must be declared. 626 return '<!DOCTYPE html [ <!ENTITY nbsp " "> <!ENTITY iexcl "¡"> <!ENTITY cent "¢"> <!ENTITY pound "£"> <!ENTITY curren "¤"> <!ENTITY yen "¥"> <!ENTITY brvbar "¦"> <!ENTITY sect "§"> <!ENTITY uml "¨"> <!ENTITY copy "©"> <!ENTITY ordf "ª"> <!ENTITY laquo "«"> <!ENTITY not "¬"> <!ENTITY shy "­"> <!ENTITY reg "®"> <!ENTITY macr "¯"> <!ENTITY deg "°"> <!ENTITY plusmn "±"> <!ENTITY sup2 "²"> <!ENTITY sup3 "³"> <!ENTITY acute "´"> <!ENTITY micro "µ"> <!ENTITY para "¶"> <!ENTITY middot "·"> <!ENTITY cedil "¸"> <!ENTITY sup1 "¹"> <!ENTITY ordm "º"> <!ENTITY raquo "»"> <!ENTITY frac14 "¼"> <!ENTITY frac12 "½"> <!ENTITY frac34 "¾"> <!ENTITY iquest "¿"> <!ENTITY Agrave "À"> <!ENTITY Aacute "Á"> <!ENTITY Acirc "Â"> <!ENTITY Atilde "Ã"> <!ENTITY Auml "Ä"> <!ENTITY Aring "Å"> <!ENTITY AElig "Æ"> <!ENTITY Ccedil "Ç"> <!ENTITY Egrave "È"> <!ENTITY Eacute "É"> <!ENTITY Ecirc "Ê"> <!ENTITY Euml "Ë"> <!ENTITY Igrave "Ì"> <!ENTITY Iacute "Í"> <!ENTITY Icirc "Î"> <!ENTITY Iuml "Ï"> <!ENTITY ETH "Ð"> <!ENTITY Ntilde "Ñ"> <!ENTITY Ograve "Ò"> <!ENTITY Oacute "Ó"> <!ENTITY Ocirc "Ô"> <!ENTITY Otilde "Õ"> <!ENTITY Ouml "Ö"> <!ENTITY times "×"> <!ENTITY Oslash "Ø"> <!ENTITY Ugrave "Ù"> <!ENTITY Uacute "Ú"> <!ENTITY Ucirc "Û"> <!ENTITY Uuml "Ü"> <!ENTITY Yacute "Ý"> <!ENTITY THORN "Þ"> <!ENTITY szlig "ß"> <!ENTITY agrave "à"> <!ENTITY aacute "á"> <!ENTITY acirc "â"> <!ENTITY atilde "ã"> <!ENTITY auml "ä"> <!ENTITY aring "å"> <!ENTITY aelig "æ"> <!ENTITY ccedil "ç"> <!ENTITY egrave "è"> <!ENTITY eacute "é"> <!ENTITY ecirc "ê"> <!ENTITY euml "ë"> <!ENTITY igrave "ì"> <!ENTITY iacute "í"> <!ENTITY icirc "î"> <!ENTITY iuml "ï"> <!ENTITY eth "ð"> <!ENTITY ntilde "ñ"> <!ENTITY ograve "ò"> <!ENTITY oacute "ó"> <!ENTITY ocirc "ô"> <!ENTITY otilde "õ"> <!ENTITY ouml "ö"> <!ENTITY divide "÷"> <!ENTITY oslash "ø"> <!ENTITY ugrave "ù"> <!ENTITY uacute "ú"> <!ENTITY ucirc "û"> <!ENTITY uuml "ü"> <!ENTITY yacute "ý"> <!ENTITY thorn "þ"> <!ENTITY yuml "ÿ"> <!ENTITY OElig "Œ"> <!ENTITY oelig "œ"> <!ENTITY Scaron "Š"> <!ENTITY scaron "š"> <!ENTITY Yuml "Ÿ"> <!ENTITY fnof "ƒ"> <!ENTITY circ "ˆ"> <!ENTITY tilde "˜"> <!ENTITY Alpha "Α"> <!ENTITY Beta "Β"> <!ENTITY Gamma "Γ"> <!ENTITY Epsilon "Ε"> <!ENTITY Zeta "Ζ"> <!ENTITY Eta "Η"> <!ENTITY Theta "Θ"> <!ENTITY Iota "Ι"> <!ENTITY Kappa "Κ"> <!ENTITY Lambda "Λ"> <!ENTITY Mu "Μ"> <!ENTITY Nu "Ν"> <!ENTITY Xi "Ξ"> <!ENTITY Omicron "Ο"> <!ENTITY Pi "Π"> <!ENTITY Rho "Ρ"> <!ENTITY Sigma "Σ"> <!ENTITY Tau "Τ"> <!ENTITY Upsilon "Υ"> <!ENTITY Phi "Φ"> <!ENTITY Chi "Χ"> <!ENTITY Psi "Ψ"> <!ENTITY Omega "Ω"> <!ENTITY alpha "α"> <!ENTITY beta "β"> <!ENTITY gamma "γ"> <!ENTITY delta "δ"> <!ENTITY epsilon "ε"> <!ENTITY zeta "ζ"> <!ENTITY eta "η"> <!ENTITY theta "θ"> <!ENTITY iota "ι"> <!ENTITY kappa "κ"> <!ENTITY lambda "λ"> <!ENTITY mu "μ"> <!ENTITY nu "ν"> <!ENTITY xi "ξ"> <!ENTITY omicron "ο"> <!ENTITY pi "π"> <!ENTITY rho "ρ"> <!ENTITY sigmaf "ς"> <!ENTITY sigma "σ"> <!ENTITY tau "τ"> <!ENTITY upsilon "υ"> <!ENTITY phi "φ"> <!ENTITY chi "χ"> <!ENTITY psi "ψ"> <!ENTITY omega "ω"> <!ENTITY thetasym "ϑ"> <!ENTITY upsih "ϒ"> <!ENTITY piv "ϖ"> <!ENTITY ensp " "> <!ENTITY emsp " "> <!ENTITY thinsp " "> <!ENTITY zwnj "‌"> <!ENTITY zwj "‍"> <!ENTITY lrm "‎"> <!ENTITY rlm "‏"> <!ENTITY ndash "–"> <!ENTITY mdash "—"> <!ENTITY lsquo "‘"> <!ENTITY rsquo "’"> <!ENTITY sbquo "‚"> <!ENTITY ldquo "“"> <!ENTITY rdquo "”"> <!ENTITY bdquo "„"> <!ENTITY dagger "†"> <!ENTITY Dagger "‡"> <!ENTITY bull "•"> <!ENTITY hellip "…"> <!ENTITY permil "‰"> <!ENTITY prime "′"> <!ENTITY Prime "″"> <!ENTITY lsaquo "‹"> <!ENTITY rsaquo "›"> <!ENTITY oline "‾"> <!ENTITY frasl "⁄"> <!ENTITY euro "€"> <!ENTITY image "ℑ"> <!ENTITY weierp "℘"> <!ENTITY real "ℜ"> <!ENTITY trade "™"> <!ENTITY alefsym "ℵ"> <!ENTITY larr "←"> <!ENTITY uarr "↑"> <!ENTITY rarr "→"> <!ENTITY darr "↓"> <!ENTITY harr "↔"> <!ENTITY crarr "↵"> <!ENTITY lArr "⇐"> <!ENTITY uArr "⇑"> <!ENTITY rArr "⇒"> <!ENTITY dArr "⇓"> <!ENTITY hArr "⇔"> <!ENTITY forall "∀"> <!ENTITY part "∂"> <!ENTITY exist "∃"> <!ENTITY empty "∅"> <!ENTITY nabla "∇"> <!ENTITY isin "∈"> <!ENTITY notin "∉"> <!ENTITY ni "∋"> <!ENTITY prod "∏"> <!ENTITY sum "∑"> <!ENTITY minus "−"> <!ENTITY lowast "∗"> <!ENTITY radic "√"> <!ENTITY prop "∝"> <!ENTITY infin "∞"> <!ENTITY ang "∠"> <!ENTITY and "∧"> <!ENTITY or "∨"> <!ENTITY cap "∩"> <!ENTITY cup "∪"> <!ENTITY int "∫"> <!ENTITY there4 "∴"> <!ENTITY sim "∼"> <!ENTITY cong "≅"> <!ENTITY asymp "≈"> <!ENTITY ne "≠"> <!ENTITY equiv "≡"> <!ENTITY le "≤"> <!ENTITY ge "≥"> <!ENTITY sub "⊂"> <!ENTITY sup "⊃"> <!ENTITY nsub "⊄"> <!ENTITY sube "⊆"> <!ENTITY supe "⊇"> <!ENTITY oplus "⊕"> <!ENTITY otimes "⊗"> <!ENTITY perp "⊥"> <!ENTITY sdot "⋅"> <!ENTITY lceil "⌈"> <!ENTITY rceil "⌉"> <!ENTITY lfloor "⌊"> <!ENTITY rfloor "⌋"> <!ENTITY lang "〈"> <!ENTITY rang "〉"> <!ENTITY loz "◊"> <!ENTITY spades "♠"> <!ENTITY clubs "♣"> <!ENTITY hearts "♥"> <!ENTITY diams "♦"> ]>'; 627 } 679 628 } 629 630 class_alias('SimplePie\Parser', 'SimplePie_Parser');
Note: See TracChangeset
for help on using the changeset viewer.