Ticket #5378: 5378.r6603.diff
File 5378.r6603.diff, 89.5 KB (added by , 17 years ago) |
---|
-
wp-includes/rss.php
1 1 <?php 2 /** 3 * MagpieRSS: a simple RSS integration tool 2 /* Project: MagpieRSS: a simple RSS integration tool 3 * File: A compiled file for RSS syndication 4 * Author: Kellan Elliot-McCrea <kellan@protest.net> 5 * WordPress development team <http://www.wordpress.org/> 6 * Charles Johnson <technophilia@radgeek.com> 7 * Version: 0.85wp (2007.09.24) 8 * License: GPL 4 9 * 5 * A compiled file for RSS syndication10 * Provenance: 6 11 * 7 * @author Kellan Elliott-McCrea <kellan@protest.net> 8 * @version 0.51 9 * @license GPL 12 * This is a drop-in replacement for the `rss-functions.php` provided with the 13 * WordPress 1.5 distribution, which upgrades the version of MagpieRSS from 0.51 14 * to 0.8a. The update improves handling of character encoding, supports 15 * multiple categories for posts (using <dc:subject> or <category>), supports 16 * Atom 1.0, and implements many other useful features. The file is derived from 17 * a combination of (1) the WordPress development team's modifications to 18 * MagpieRSS 0.51 and (2) the latest bleeding-edge updates to the "official" 19 * MagpieRSS software, including Kellan's original work and some substantial 20 * updates by Charles Johnson. All possible through the magic of the GPL. Yay 21 * for free software! 10 22 * 11 * @package External 12 * @subpackage MagpieRSS 23 * Differences from the main branch of MagpieRSS: 24 * 25 * 1. Everything in rss_parse.inc, rss_fetch.inc, rss_cache.inc, and 26 * rss_utils.inc is included in one file. 27 * 28 * 2. MagpieRSS returns the WordPress version as the user agent, rather than 29 * Magpie 30 * 31 * 3. class RSSCache is a modified version by WordPress developers, which 32 * caches feeds in the WordPress database (in the options table), rather 33 * than writing external files directly. 34 * 35 * 4. There are two WordPress-specific functions, get_rss() and wp_rss() 36 * 37 * Differences from the version of MagpieRSS packaged with WordPress: 38 * 39 * 1. Support for translation between multiple character encodings. Under 40 * PHP 5 this is very nicely handled by the XML parsing library. Under PHP 41 * 4 we need to do a little bit of work ourselves, using either iconv or 42 * mb_convert_encoding if it is not one of the (extremely limited) number 43 * of character sets that PHP 4's XML module can handle natively. 44 * 45 * 2. Numerous bug fixes. 46 * 47 * 3. The parser class MagpieRSS has been substantially revised to better 48 * support popular features such as enclosures and multiple categories, 49 * and to support the new Atom 1.0 IETF standard. (Atom feeds are 50 * normalized so as to make the data available using terminology from 51 * either Atom 0.3 or Atom 1.0. Atom 0.3 backward-compatibility is provided 52 * to allow existing software to easily begin accepting Atom 1.0 data; new 53 * software SHOULD NOT depend on the 0.3 terminology, but rather use the 54 * normalization as a convenient way to keep supporting 0.3 feeds while 55 * they linger in the world.) 56 * 57 * The upgraded MagpieRSS can also now handle some content constructs that 58 * had not been handled well by previous versions of Magpie (such as the 59 * use of namespaced XHTML in <xhtml:body> or <xhtml:div> elements to 60 * provide the full content of posts in RSS 2.0 feeds). 61 * 62 * Unlike previous versions of MagpieRSS, this version can parse multiple 63 * instances of the same child element in item/entry and channel/feed 64 * containers. This is done using simple counters next to the element 65 * names: the first <category> element on an RSS item, for example, can be 66 * found in $item['category'] (thus preserving backward compatibility); the 67 * second in $item['category#2'], the third in $item['category#3'], and so 68 * on. The number of categories applied to the item can be found in 69 * $item['category#'] 70 * 71 * Also unlike previous versions of MagpieRSS, this version allows you to 72 * access the values of elements' attributes as well as the content they 73 * contain. This can be done using a simple syntax inspired by XPath: to 74 * access the type attribute of an RSS 2.0 enclosure, for example, you 75 * need only access `$item['enclosure@type']`. A comma-separated list of 76 * attributes for the enclosure element is stored in `$item['enclosure@']`. 77 * (This syntax interacts easily with the syntax for multiple categories; 78 * for example, the value of the `scheme` attribute for the fourth category 79 * element on a particular item is stored in `$item['category#4@scheme']`.) 80 * 81 * Note also that this implementation IS NOT backward-compatible with the 82 * kludges that were used to hack in support for multiple categories and 83 * for enclosures in upgraded versions of MagpieRSS distributed with 84 * previous versions of FeedWordPress. If your hacks or filter plugins 85 * depended on the old way of doing things... well, I warned you that they 86 * might not be permanent. Sorry! 13 87 */ 14 88 15 /*16 * Hook to use another RSS object instead of MagpieRSS17 */18 do_action('load_feed_engine');19 20 21 89 define('RSS', 'RSS'); 22 90 define('ATOM', 'Atom'); 23 define('MAGPIE_USER_AGENT', 'WordPress/' . $GLOBALS['wp_version']);24 91 25 class MagpieRSS { 26 var $parser; 27 var $current_item = array(); // item currently being parsed 28 var $items = array(); // collection of parsed items 29 var $channel = array(); // hash of channel fields 30 var $textinput = array(); 31 var $image = array(); 32 var $feed_type; 33 var $feed_version; 92 ################################################################################ 93 ## WordPress: make some settings WordPress-appropriate ######################### 94 ################################################################################ 34 95 35 // parser variables 36 var $stack = array(); // parser stack 37 var $inchannel = false; 38 var $initem = false; 39 var $incontent = false; // if in Atom <content mode="xml"> field 40 var $intextinput = false; 41 var $inimage = false; 42 var $current_field = ''; 43 var $current_namespace = false; 96 define('MAGPIE_USER_AGENT', 'WordPress/' . $wp_version . '(+http://www.wordpress.org)'); 44 97 45 //var $ERROR = ""; 98 $wp_encoding = get_settings('blog_charset'); 99 define('MAGPIE_OUTPUT_ENCODING', ($wp_encoding?$wp_encoding:'ISO-8859-1')); 46 100 47 var $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright'); 101 ################################################################################ 102 ## rss_parse.inc: from MagpieRSS 0.85 ########################################## 103 ################################################################################ 48 104 49 function MagpieRSS ($source) { 105 /** 106 * Hybrid parser, and object, takes RSS as a string and returns a simple object. 107 * 108 * see: rss_fetch.inc for a simpler interface with integrated caching support 109 * 110 */ 111 class MagpieRSS { 112 var $parser; 113 114 var $current_item = array(); // item currently being parsed 115 var $items = array(); // collection of parsed items 116 var $channel = array(); // hash of channel fields 117 var $textinput = array(); 118 var $image = array(); 119 var $feed_type; 120 var $feed_version; 121 var $encoding = ''; // output encoding of parsed rss 122 123 var $_source_encoding = ''; // only set if we have to parse xml prolog 124 125 var $ERROR = ""; 126 var $WARNING = ""; 127 128 // define some constants 129 var $_XMLNS_FAMILIAR = array ( 130 'http://www.w3.org/2005/Atom' => 'atom' /* 1.0 */, 131 'http://purl.org/atom/ns#' => 'atom' /* pre-1.0 */, 132 'http://purl.org/rss/1.0/' => 'rss' /* 1.0 */, 133 'http://backend.userland.com/RSS2' => 'rss' /* 2.0 */, 134 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf', 135 'http://www.w3.org/1999/xhtml' => 'xhtml', 136 'http://purl.org/dc/elements/1.1/' => 'dc', 137 'http://purl.org/dc/terms/' => 'dcterms', 138 'http://purl.org/rss/1.0/modules/content/' => 'content', 139 'http://purl.org/rss/1.0/modules/syndication/' => 'sy', 140 'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo', 141 'http://purl.org/rss/1.0/modules/dc/' => 'dc', 142 'http://wellformedweb.org/CommentAPI/' => 'wfw', 143 'http://webns.net/mvcb/' => 'admin', 144 'http://purl.org/rss/1.0/modules/annotate/' => 'annotate', 145 'http://xmlns.com/foaf/0.1/' => 'foaf', 146 'http://madskills.com/public/xml/rss/module/trackback/' => 'trackback', 147 'http://web.resource.org/cc/' => 'cc' 148 ); 50 149 51 # if PHP xml isn't compiled in, die 52 # 53 if ( !function_exists('xml_parser_create') ) 54 trigger_error( "Failed to load PHP's XML Extension. http://www.php.net/manual/en/ref.xml.php" ); 150 var $_XMLBASE_RESOLVE = array ( 151 // Atom 0.3 and 1.0 xml:base support 152 'atom' => array ( 153 'link' => array ('href' => true), 154 'content' => array ('src' => true, '*xml' => true, '*html' => true), 155 'summary' => array ('*xml' => true, '*html' => true), 156 'title' => array ('*xml' => true, '*html' => true), 157 'rights' => array ('*xml' => true, '*html' => true), 158 'subtitle' => array ('*xml' => true, '*html' => true), 159 'info' => array('*xml' => true, '*html' => true), 160 'tagline' => array('*xml' => true, '*html' => true), 161 'copyright' => array ('*xml' => true, '*html' => true), 162 'generator' => array ('uri' => true, 'url' => true), 163 'uri' => array ('*content' => true), 164 'url' => array ('*content' => true), 165 'icon' => array ('*content' => true), 166 'logo' => array ('*content' => true), 167 ), 168 169 // for inline namespaced XHTML 170 'xhtml' => array ( 171 'a' => array ('href' => true), 172 'applet' => array('codebase' => true), 173 'area' => array('href' => true), 174 'blockquote' => array('cite' => true), 175 'body' => array('background' => true), 176 'del' => array('cite' => true), 177 'form' => array('action' => true), 178 'frame' => array('longdesc' => true, 'src' => true), 179 'iframe' => array('longdesc' => true, 'iframe' => true, 'src' => true), 180 'head' => array('profile' => true), 181 'img' => array('longdesc' => true, 'src' => true, 'usemap' => true), 182 'input' => array('src' => true, 'usemap' => true), 183 'ins' => array('cite' => true), 184 'link' => array('href' => true), 185 'object' => array('classid' => true, 'codebase' => true, 'data' => true, 'usemap' => true), 186 'q' => array('cite' => true), 187 'script' => array('src' => true), 188 ), 189 ); 55 190 56 $parser = @xml_parser_create(); 191 var $_ATOM_CONTENT_CONSTRUCTS = array( 192 'content', 'summary', 'title', /* common */ 193 'info', 'tagline', 'copyright', /* Atom 0.3 */ 194 'rights', 'subtitle', /* Atom 1.0 */ 195 ); 196 var $_XHTML_CONTENT_CONSTRUCTS = array('body', 'div'); 197 var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1'); 57 198 58 if ( !is_resource($parser) ) 59 trigger_error( "Failed to create an instance of PHP's XML parser. http://www.php.net/manual/en/ref.xml.php"); 199 // parser variables, useless if you're not a parser, treat as private 200 var $stack = array('element' => array (), 'xmlns' => array (), 'xml:base' => array ()); // stack of XML data 60 201 202 var $inchannel = false; 203 var $initem = false; 61 204 62 $this->parser = $parser; 205 var $incontent = array(); // non-empty if in namespaced XML content field 206 var $xml_escape = false; // true when accepting namespaced XML 207 var $exclude_top = false; // true when Atom 1.0 type="xhtml" 63 208 64 # pass in parser, and a reference to this object 65 # setup handlers 66 # 67 xml_set_object( $this->parser, $this ); 68 xml_set_element_handler($this->parser, 69 'feed_start_element', 'feed_end_element' ); 209 var $intextinput = false; 210 var $inimage = false; 211 var $root_namespaces = array(); 212 var $current_namespace = false; 213 var $working_namespace_table = array(); 70 214 71 xml_set_character_data_handler( $this->parser, 'feed_cdata' ); 215 /** 216 * Set up XML parser, parse source, and return populated RSS object.. 217 * 218 * @param string $source string containing the RSS to be parsed 219 * 220 * NOTE: Probably a good idea to leave the encoding options alone unless 221 * you know what you're doing as PHP's character set support is 222 * a little weird. 223 * 224 * NOTE: A lot of this is unnecessary but harmless with PHP5 225 * 226 * 227 * @param string $output_encoding output the parsed RSS in this character 228 * set defaults to ISO-8859-1 as this is PHP's 229 * default. 230 * 231 * NOTE: might be changed to UTF-8 in future 232 * versions. 233 * 234 * @param string $input_encoding the character set of the incoming RSS source. 235 * Leave blank and Magpie will try to figure it 236 * out. 237 * 238 * 239 * @param bool $detect_encoding if false Magpie won't attempt to detect 240 * source encoding. (caveat emptor) 241 * 242 */ 243 function MagpieRSS ($source, $output_encoding='ISO-8859-1', 244 $input_encoding=null, $detect_encoding=true, $base_uri=null) 245 { 246 # if PHP xml isn't compiled in, die 247 # 248 if (!function_exists('xml_parser_create')) { 249 $this->error( "Failed to load PHP's XML Extension. " . 250 "http://www.php.net/manual/en/ref.xml.php", 251 E_USER_ERROR ); 252 } 253 254 list($parser, $source) = $this->create_parser($source, 255 $output_encoding, $input_encoding, $detect_encoding); 256 257 258 if (!is_resource($parser)) { 259 $this->error( "Failed to create an instance of PHP's XML parser. " . 260 "http://www.php.net/manual/en/ref.xml.php", 261 E_USER_ERROR ); 262 } 72 263 73 $status = xml_parse( $this->parser, $source ); 264 265 $this->parser = $parser; 266 267 # pass in parser, and a reference to this object 268 # setup handlers 269 # 270 xml_set_object( $this->parser, $this ); 271 xml_set_element_handler($this->parser, 272 'feed_start_element', 'feed_end_element' ); 273 274 xml_set_character_data_handler( $this->parser, 'feed_cdata' ); 74 275 75 if (! $status ) { 76 $errorcode = xml_get_error_code( $this->parser ); 77 if ( $errorcode != XML_ERROR_NONE ) { 78 $xml_error = xml_error_string( $errorcode ); 79 $error_line = xml_get_current_line_number($this->parser); 80 $error_col = xml_get_current_column_number($this->parser); 81 $errormsg = "$xml_error at line $error_line, column $error_col"; 276 $this->stack['xml:base'] = array($base_uri); 82 277 83 $this->error( $errormsg ); 84 } 85 } 278 $status = xml_parse( $this->parser, $source ); 279 280 if (! $status ) { 281 $errorcode = xml_get_error_code( $this->parser ); 282 if ( $errorcode != XML_ERROR_NONE ) { 283 $xml_error = xml_error_string( $errorcode ); 284 $error_line = xml_get_current_line_number($this->parser); 285 $error_col = xml_get_current_column_number($this->parser); 286 $errormsg = "$xml_error at line $error_line, column $error_col"; 86 287 87 xml_parser_free( $this->parser ); 288 $this->error( $errormsg ); 289 } 290 } 291 292 xml_parser_free( $this->parser ); 88 293 89 $this->normalize(); 90 } 294 $this->normalize(); 295 } 296 297 function feed_start_element($p, $element, &$attributes) { 298 $el = strtolower($element); 91 299 92 function feed_start_element($p, $element, &$attrs) { 93 $el = $element = strtolower($element); 94 $attrs = array_change_key_case($attrs, CASE_LOWER); 300 $namespaces = end($this->stack['xmlns']); 301 $baseuri = end($this->stack['xml:base']); 95 302 96 // check for a namespace, and split if found 97 $ns = false; 98 if ( strpos( $element, ':' ) ) { 99 list($ns, $el) = split( ':', $element, 2); 100 } 101 if ( $ns and $ns != 'rdf' ) { 102 $this->current_namespace = $ns; 103 } 303 if (isset($attributes['xml:base'])) { 304 $baseuri = Relative_URI::resolve($attributes['xml:base'], $baseuri); 305 } 306 array_push($this->stack['xml:base'], $baseuri); 104 307 105 # if feed type isn't set, then this is first element of feed 106 # identify feed from root element 107 # 108 if (!isset($this->feed_type) ) { 109 if ( $el == 'rdf' ) { 110 $this->feed_type = RSS; 111 $this->feed_version = '1.0'; 112 } 113 elseif ( $el == 'rss' ) { 114 $this->feed_type = RSS; 115 $this->feed_version = $attrs['version']; 116 } 117 elseif ( $el == 'feed' ) { 118 $this->feed_type = ATOM; 119 $this->feed_version = $attrs['version']; 120 $this->inchannel = true; 121 } 122 return; 308 // scan for xml namespace declarations. ugly ugly ugly. 309 // theoretically we could use xml_set_start_namespace_decl_handler and 310 // xml_set_end_namespace_decl_handler to handle this more elegantly, but 311 // support for these is buggy 312 foreach ($attributes as $attr => $value) { 313 if ( preg_match('/^xmlns(\:([A-Z_a-z].*))?$/', $attr, $match) ) { 314 $ns = (isset($match[2]) ? $match[2] : ''); 315 $namespaces[$ns] = $value; 123 316 } 317 } 124 318 125 if ( $el == 'channel' ) 126 { 127 $this->inchannel = true; 128 } 129 elseif ($el == 'item' or $el == 'entry' ) 130 { 131 $this->initem = true; 132 if ( isset($attrs['rdf:about']) ) { 133 $this->current_item['about'] = $attrs['rdf:about']; 134 } 135 } 319 array_push($this->stack['xmlns'], $namespaces); 136 320 137 // if we're in the default namespace of an RSS feed, 138 // record textinput or image fields 139 elseif ( 140 $this->feed_type == RSS and 141 $this->current_namespace == '' and 142 $el == 'textinput' ) 143 { 144 $this->intextinput = true; 145 } 321 // check for a namespace, and split if found 322 // Don't munge content tags 323 $ns = $this->namespace($element); 324 if ( empty($this->incontent) ) { 325 $el = strtolower($ns['element']); 326 $this->current_namespace = $ns['effective']; 327 } 146 328 147 elseif ( 148 $this->feed_type == RSS and 149 $this->current_namespace == '' and 150 $el == 'image' ) 151 { 152 $this->inimage = true; 329 $nsc = $ns['canonical']; $nse = $ns['element']; 330 if ( isset($this->_XMLBASE_RESOLVE[$nsc][$nse]) ) { 331 if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*xml'])) { 332 $attributes['xml:base'] = $baseuri; 153 333 } 154 155 # handle atom content constructs 156 elseif ( $this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) 157 { 158 // avoid clashing w/ RSS mod_content 159 if ($el == 'content' ) { 160 $el = 'atom_content'; 334 foreach ($attributes as $key => $value) { 335 if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse][strtolower($key)])) { 336 $attributes[$key] = Relative_URI::resolve($attributes[$key], $baseuri); 161 337 } 338 } 339 } 162 340 163 $this->incontent = $el;341 $attrs = array_change_key_case($attributes, CASE_LOWER); 164 342 343 # if feed type isn't set, then this is first element of feed 344 # identify feed from root element 345 # 346 if (!isset($this->feed_type) ) { 347 if ( $el == 'rdf' ) { 348 $this->feed_type = RSS; 349 $this->root_namespaces = array('rss', 'rdf'); 350 $this->feed_version = '1.0'; 351 } 352 elseif ( $el == 'rss' ) { 353 $this->feed_type = RSS; 354 $this->root_namespaces = array('rss'); 355 $this->feed_version = $attrs['version']; 356 } 357 elseif ( $el == 'feed' ) { 358 $this->feed_type = ATOM; 359 $this->root_namespaces = array('atom'); 360 if ($ns['uri'] == 'http://www.w3.org/2005/Atom') { // Atom 1.0 361 $this->root_namespaces = array('atom'); 362 $this->feed_version = '1.0'; 363 } 364 else { // Atom 0.3, probably. 365 $this->feed_version = $attrs['version']; 366 } 367 $this->inchannel = true; 368 } 369 return; 370 } 165 371 166 } 372 // if we're inside a namespaced content construct, treat tags as text 373 if ( !empty($this->incontent) ) 374 { 375 if ((count($this->incontent) > 1) or !$this->exclude_top) { 376 if ($ns['effective']=='xhtml') { 377 $tag = $ns['element']; 378 } 379 else { 380 $tag = $element; 381 $xmlns = 'xmlns'; 382 if (strlen($ns['prefix'])>0) { 383 $xmlns = $xmlns . ':' . $ns['prefix']; 384 } 385 $attributes[$xmlns] = $ns['uri']; // make sure it's visible 386 } 167 387 168 // if inside an Atom content construct (e.g. content or summary) field treat tags as text 169 elseif ($this->feed_type == ATOM and $this->incontent ) 170 { 171 // if tags are inlined, then flatten 172 $attrs_str = join(' ', 173 array_map('map_attrs', 174 array_keys($attrs), 175 array_values($attrs) ) ); 388 // if tags are inlined, then flatten 389 $attrs_str = join(' ', 390 array_map(array($this, 'map_attrs'), 391 array_keys($attributes), 392 array_values($attributes) ) 393 ); 394 395 if (strlen($attrs_str) > 0) { $attrs_str = ' '.$attrs_str; } 396 $this->append_content( "<{$tag}{$attrs_str}>" ); 397 } 398 array_push($this->incontent, $ns); // stack for parsing content XML 399 } 176 400 177 $this->append_content( "<$element $attrs_str>" ); 401 elseif ( $el == 'channel' ) { 402 $this->inchannel = true; 403 } 404 405 elseif ($el == 'item' or $el == 'entry' ) 406 { 407 $this->initem = true; 408 if ( isset($attrs['rdf:about']) ) { 409 $this->current_item['about'] = $attrs['rdf:about']; 410 } 411 } 178 412 179 array_unshift( $this->stack, $el ); 180 } 413 // if we're in the default namespace of an RSS feed, 414 // record textinput or image fields 415 elseif ( 416 $this->feed_type == RSS and 417 $this->current_namespace == '' and 418 $el == 'textinput' ) 419 { 420 $this->intextinput = true; 421 } 422 423 elseif ( 424 $this->feed_type == RSS and 425 $this->current_namespace == '' and 426 $el == 'image' ) 427 { 428 $this->inimage = true; 429 } 430 431 // set stack[0] to current element 432 else { 433 // Atom support many links per containing element. 434 // Magpie treats link elements of type rel='alternate' 435 // as being equivalent to RSS's simple link element. 181 436 182 // Atom support many links per containging element. 183 // Magpie treats link elements of type rel='alternate' 184 // as being equivalent to RSS's simple link element. 185 // 186 elseif ($this->feed_type == ATOM and $el == 'link' ) 187 { 188 if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' ) 189 { 190 $link_el = 'link'; 191 } 192 else { 193 $link_el = 'link_' . $attrs['rel']; 194 } 437 $atom_link = false; 438 if ($this->feed_type == ATOM and $el == 'link') { 439 $atom_link = true; 440 if (isset($attrs['rel']) and $attrs['rel'] != 'alternate') { 441 $el = $el . "_" . $attrs['rel']; // pseudo-element names for Atom link elements 442 } 443 } 444 # handle atom content constructs 445 elseif ( $this->feed_type == ATOM and in_array($el, $this->_ATOM_CONTENT_CONSTRUCTS) ) 446 { 447 // avoid clashing w/ RSS mod_content 448 if ($el == 'content' ) { 449 $el = 'atom_content'; 450 } 195 451 196 $this->append($link_el, $attrs['href']); 197 } 198 // set stack[0] to current element 199 else { 200 array_unshift($this->stack, $el); 201 }202 }452 // assume that everything accepts namespaced XML 453 // (that will pass through some non-validating feeds; 454 // but so what? this isn't a validating parser) 455 $this->incontent = array(); 456 array_push($this->incontent, $ns); // start a stack 457 458 $this->xml_escape = $this->accepts_namespaced_xml($attrs); 203 459 460 if ( isset($attrs['type']) and trim(strtolower($attrs['type']))=='xhtml') { 461 $this->exclude_top = true; 462 } else { 463 $this->exclude_top = false; 464 } 465 } 466 # Handle inline XHTML body elements --CWJ 467 elseif ($ns['effective']=='xhtml' and in_array($el, $this->_XHTML_CONTENT_CONSTRUCTS)) { 468 $this->current_namespace = 'xhtml'; 469 $this->incontent = array(); 470 array_push($this->incontent, $ns); // start a stack 204 471 472 $this->xml_escape = true; 473 $this->exclude_top = false; 474 } 475 476 array_unshift($this->stack['element'], $el); 477 $elpath = join('_', array_reverse($this->stack['element'])); 478 479 $n = $this->element_count($elpath); 480 $this->element_count($elpath, $n+1); 481 482 if ($n > 0) { 483 array_shift($this->stack['element']); 484 array_unshift($this->stack['element'], $el.'#'.($n+1)); 485 $elpath = join('_', array_reverse($this->stack['element'])); 486 } 487 488 // this makes the baby Jesus cry, but we can't do it in normalize() 489 // because we've made the element name for Atom links unpredictable 490 // by tacking on the relation to the end. -CWJ 491 if ($atom_link and isset($attrs['href'])) { 492 $this->append($elpath, $attrs['href']); 493 } 494 495 // add attributes 496 if (count($attrs) > 0) { 497 $this->append($elpath.'@', join(',', array_keys($attrs))); 498 foreach ($attrs as $attr => $value) { 499 $this->append($elpath.'@'.$attr, $value); 500 } 501 } 502 } 503 } 205 504 206 function feed_cdata ($p, $text) { 505 function feed_cdata ($p, $text) { 506 if ($this->incontent) { 507 if ($this->xml_escape) { $text = htmlspecialchars($text, ENT_COMPAT, $this->encoding); } 508 $this->append_content( $text ); 509 } else { 510 $current_el = join('_', array_reverse($this->stack['element'])); 511 $this->append($current_el, $text); 512 } 513 } 514 515 function feed_end_element ($p, $el) { 516 $closer = $this->namespace($el); 207 517 208 if ($this->feed_type == ATOM and $this->incontent) 209 { 210 $this->append_content( $text ); 211 } 212 else { 213 $current_el = join('_', array_reverse($this->stack)); 214 $this->append($current_el, $text); 215 } 216 } 518 if ( $this->incontent ) { 519 $opener = array_pop($this->incontent); 217 520 218 function feed_end_element ($p, $el) { 219 $el = strtolower($el); 220 221 if ( $el == 'item' or $el == 'entry' ) 222 { 223 $this->items[] = $this->current_item; 224 $this->current_item = array(); 225 $this->initem = false; 521 // balance tags properly 522 // note: i don't think this is actually neccessary 523 if ($opener != $closer) { 524 array_push($this->incontent, $opener); 525 $this->append_content("<$el />"); 526 } elseif ($this->incontent) { // are we in the content construct still? 527 if ((count($this->incontent) > 1) or !$this->exclude_top) { 528 if ($closer['effective']=='xhtml') { 529 $tag = $closer['element']; 530 } 531 else { 532 $tag = $el; 533 } 534 $this->append_content("</$tag>"); 535 } 536 } else { // if we're done with the content construct, shift the opening of the content construct off the normal stack 537 array_shift( $this->stack['element'] ); 538 } 539 } 540 elseif ($closer['effective'] == '') { 541 $el = strtolower($closer['element']); 542 if ( $el == 'item' or $el == 'entry' ) { 543 $this->items[] = $this->current_item; 544 $this->current_item = array(); 545 $this->initem = false; 546 $this->current_category = 0; 547 } 548 elseif ($this->feed_type == RSS and $el == 'textinput' ) { 549 $this->intextinput = false; 550 } 551 elseif ($this->feed_type == RSS and $el == 'image' ) { 552 $this->inimage = false; 553 } 554 elseif ($el == 'channel' or $el == 'feed' ) { 555 $this->inchannel = false; 556 } else { 557 $nsc = $closer['canonical']; $nse = $closer['element']; 558 if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*content'])) { 559 // Resolve relative URI in content of tag 560 $this->dereference_current_element(); 561 } 562 array_shift( $this->stack['element'] ); 563 } 564 } else { 565 $nsc = $closer['canonical']; $nse = strtolower($closer['element']); 566 if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*content'])) { 567 // Resolve relative URI in content of tag 568 $this->dereference_current_element(); 569 } 570 array_shift( $this->stack['element'] ); 571 } 572 573 if ( !$this->incontent ) { // Don't munge the namespace after finishing with elements in namespaced content constructs -CWJ 574 $this->current_namespace = false; 575 } 576 array_pop($this->stack['xmlns']); 577 array_pop($this->stack['xml:base']); 578 } 579 580 // Namespace handling functions 581 function namespace ($element) { 582 $namespaces = end($this->stack['xmlns']); 583 $ns = ''; 584 if ( strpos( $element, ':' ) ) { 585 list($ns, $element) = split( ':', $element, 2); 226 586 } 227 elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' ) 228 { 229 $this->intextinput = false; 230 } 231 elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' ) 232 { 233 $this->inimage = false; 234 } 235 elseif ($this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) 236 { 237 $this->incontent = false; 238 } 239 elseif ($el == 'channel' or $el == 'feed' ) 240 { 241 $this->inchannel = false; 242 } 243 elseif ($this->feed_type == ATOM and $this->incontent ) { 244 // balance tags properly 245 // note: i don't think this is actually neccessary 246 if ( $this->stack[0] == $el ) 247 { 248 $this->append_content("</$el>"); 249 } 250 else { 251 $this->append_content("<$el />"); 252 } 587 588 $uri = (isset($namespaces[$ns]) ? $namespaces[$ns] : null); 253 589 254 array_shift( $this->stack ); 590 if (!is_null($uri)) { 591 $canonical = ( 592 isset($this->_XMLNS_FAMILIAR[$uri]) 593 ? $this->_XMLNS_FAMILIAR[$uri] 594 : $uri 595 ); 596 } else { 597 $canonical = $ns; 255 598 } 256 else {257 array_shift( $this->stack );258 }259 599 260 $this->current_namespace = false; 261 } 262 263 function concat (&$str1, $str2="") { 264 if (!isset($str1) ) { 265 $str1=""; 600 if (in_array($canonical, $this->root_namespaces)) { 601 $effective = ''; 602 } else { 603 $effective = $canonical; 266 604 } 267 $str1 .= $str2;268 }269 605 270 function append_content($text) { 271 if ( $this->initem ) { 272 $this->concat( $this->current_item[ $this->incontent ], $text ); 273 } 274 elseif ( $this->inchannel ) { 275 $this->concat( $this->channel[ $this->incontent ], $text ); 276 } 606 return array('effective' => $effective, 'canonical' => $canonical, 'prefix' => $ns, 'uri' => $uri, 'element' => $element); 277 607 } 278 608 279 // smart append - field and namespace aware 280 function append($el, $text) { 281 if (!$el) { 282 return; 283 } 284 if ( $this->current_namespace ) 285 { 286 if ( $this->initem ) { 287 $this->concat( 288 $this->current_item[ $this->current_namespace ][ $el ], $text); 609 // Utility functions for accessing data structure 610 611 // for smart, namespace-aware methods... 612 function magpie_data ($el, $method, $text = NULL) { 613 $ret = NULL; 614 if ($el) { 615 if (is_array($method)) { 616 $el = $this->{$method['key']}($el); 617 $method = $method['value']; 289 618 } 290 elseif ($this->inchannel) { 291 $this->concat( 292 $this->channel[ $this->current_namespace][ $el ], $text ); 293 } 294 elseif ($this->intextinput) { 295 $this->concat( 296 $this->textinput[ $this->current_namespace][ $el ], $text ); 297 } 298 elseif ($this->inimage) { 299 $this->concat( 619 620 if ( $this->current_namespace ) { 621 if ( $this->initem ) { 622 $ret = $this->{$method} ( 623 $this->current_item[ $this->current_namespace ][ $el ], 624 $text 625 ); 626 } 627 elseif ($this->inchannel) { 628 $ret = $this->{$method} ( 629 $this->channel[ $this->current_namespace][ $el ], 630 $text 631 ); 632 } 633 elseif ($this->intextinput) { 634 $ret = $this->{$method} ( 635 $this->textinput[ $this->current_namespace][ $el ], 636 $text 637 ); 638 } 639 elseif ($this->inimage) { 640 $ret = $this->{$method} ( 300 641 $this->image[ $this->current_namespace ][ $el ], $text ); 642 } 301 643 } 302 } 303 else { 304 if ( $this->initem ) { 305 $this->concat( 644 else { 645 if ( $this->initem ) { 646 $ret = $this->{$method} ( 306 647 $this->current_item[ $el ], $text); 307 }308 elseif ($this->intextinput) {309 $this->concat(648 } 649 elseif ($this->intextinput) { 650 $ret = $this->{$method} ( 310 651 $this->textinput[ $el ], $text ); 311 }312 elseif ($this->inimage) {313 $this->concat(652 } 653 elseif ($this->inimage) { 654 $ret = $this->{$method} ( 314 655 $this->image[ $el ], $text ); 315 }316 elseif ($this->inchannel) {317 $this->concat(656 } 657 elseif ($this->inchannel) { 658 $ret = $this->{$method} ( 318 659 $this->channel[ $el ], $text ); 660 } 319 661 } 320 321 662 } 663 return $ret; 322 664 } 665 666 function concat (&$str1, $str2="") { 667 if (!isset($str1) ) { 668 $str1=""; 669 } 670 $str1 .= $str2; 671 } 323 672 324 function normalize () { 325 // if atom populate rss fields 326 if ( $this->is_atom() ) { 327 $this->channel['descripton'] = $this->channel['tagline']; 328 for ( $i = 0; $i < count($this->items); $i++) { 329 $item = $this->items[$i]; 330 if ( isset($item['summary']) ) 331 $item['description'] = $item['summary']; 332 if ( isset($item['atom_content'])) 333 $item['content']['encoded'] = $item['atom_content']; 673 function retrieve_value (&$el, $text /*ignore*/) { 674 return $el; 675 } 676 function replace_value (&$el, $text) { 677 $el = $text; 678 } 679 function counter_key ($el) { 680 return $el.'#'; 681 } 334 682 335 $this->items[$i] = $item;336 }337 }338 elseif ( $this->is_rss() ) {339 $this->channel['tagline'] = $this->channel['description'];340 for ( $i = 0; $i < count($this->items); $i++) {341 $item = $this->items[$i];342 if ( isset($item['description']))343 $item['summary'] = $item['description'];344 if ( isset($item['content']['encoded'] ) )345 $item['atom_content'] = $item['content']['encoded'];346 683 347 $this->items[$i] = $item; 348 } 349 } 350 } 684 function append_content($text) { 685 $construct = reset($this->incontent); 686 $ns = $construct['effective']; 351 687 352 function is_rss () { 353 if ( $this->feed_type == RSS ) { 354 return $this->feed_version; 355 } 356 else { 357 return false; 358 } 359 } 688 // Keeping data about parent elements is necessary to 689 // properly handle atom:source and its children elements 690 $tag = join('_', array_reverse($this->stack['element'])); 360 691 361 function is_atom() { 362 if ( $this->feed_type == ATOM ) { 363 return $this->feed_version; 692 if ( $this->initem ) { 693 if ($ns) { 694 $this->concat( $this->current_item[$ns][$tag], $text ); 695 } else { 696 $this->concat( $this->current_item[$tag], $text ); 364 697 } 365 else { 366 return false; 698 } 699 elseif ( $this->inchannel ) { 700 if ($this->current_namespace) { 701 $this->concat( $this->channel[$ns][$tag], $text ); 702 } else { 703 $this->concat( $this->channel[$tag], $text ); 367 704 } 368 } 705 } 706 } 707 708 // smart append - field and namespace aware 709 function append($el, $text) { 710 $this->magpie_data($el, 'concat', $text); 711 } 369 712 370 function map_attrs($k, $v) { 371 return "$k=\"$v\""; 713 function dereference_current_element () { 714 $el = join('_', array_reverse($this->stack['element'])); 715 $base = end($this->stack['xml:base']); 716 $uri = $this->magpie_data($el, 'retrieve_value'); 717 $this->magpie_data($el, 'replace_value', Relative_URI::resolve($uri, $base)); 372 718 } 373 719 374 function error( $errormsg, $lvl = E_USER_WARNING ) { 375 // append PHP's error message if track_errors enabled 376 if ( isset($php_errormsg) ) { 377 $errormsg .= " ($php_errormsg)"; 378 } 379 if ( MAGPIE_DEBUG ) { 380 trigger_error( $errormsg, $lvl); 381 } else { 382 error_log( $errormsg, 0); 383 } 720 // smart count - field and namespace aware 721 function element_count ($el, $set = NULL) { 722 if (!is_null($set)) { 723 $ret = $this->magpie_data($el, array('key' => 'counter_key', 'value' => 'replace_value'), $set); 384 724 } 725 $ret = $this->magpie_data($el, array('key' => 'counter_key', 'value' => 'retrieve_value')); 726 return ($ret ? $ret : 0); 727 } 385 728 386 } 387 require_once( dirname(__FILE__) . '/class-snoopy.php'); 729 function normalize_enclosure (&$source, $from, &$dest, $to, $i) { 730 $id_from = $this->element_id($from, $i); 731 $id_to = $this->element_id($to, $i); 732 if (isset($source["{$id_from}@"])) { 733 foreach (explode(',', $source["{$id_from}@"]) as $attr) { 734 if ($from=='link_enclosure' and $attr=='href') { // from Atom 735 $dest["{$id_to}@url"] = $source["{$id_from}@{$attr}"]; 736 $dest["{$id_to}"] = $source["{$id_from}@{$attr}"]; 737 } 738 elseif ($from=='enclosure' and $attr=='url') { // from RSS 739 $dest["{$id_to}@href"] = $source["{$id_from}@{$attr}"]; 740 $dest["{$id_to}"] = $source["{$id_from}@{$attr}"]; 741 } 742 else { 743 $dest["{$id_to}@{$attr}"] = $source["{$id_from}@{$attr}"]; 744 } 745 } 746 } 747 } 388 748 389 if ( !function_exists('fetch_rss') ) : 390 function fetch_rss ($url) { 391 // initialize constants 392 init(); 749 function normalize_atom_person (&$source, $person, &$dest, $to, $i) { 750 $id = $this->element_id($person, $i); 751 $id_to = $this->element_id($to, $i); 393 752 394 if ( !isset($url) ) { 395 // error("fetch_rss called without a url"); 396 return false; 397 } 753 // Atom 0.3 <=> Atom 1.0 754 if ($this->feed_version >= 1.0) { $used = 'uri'; $norm = 'url'; } 755 else { $used = 'url'; $norm = 'uri'; } 398 756 399 // if cache is disabled 400 if ( !MAGPIE_CACHE_ON ) { 401 // fetch file, and parse it 402 $resp = _fetch_remote_file( $url ); 403 if ( is_success( $resp->status ) ) { 404 return _response_to_rss( $resp ); 405 } 406 else { 407 // error("Failed to fetch $url and cache is off"); 408 return false; 409 } 410 } 411 // else cache is ON 412 else { 413 // Flow 414 // 1. check cache 415 // 2. if there is a hit, make sure its fresh 416 // 3. if cached obj fails freshness check, fetch remote 417 // 4. if remote fails, return stale object, or error 757 if (isset($source["{$id}_{$used}"])) { 758 $dest["{$id_to}_{$norm}"] = $source["{$id}_{$used}"]; 759 } 418 760 419 $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE ); 761 // Atom to RSS 2.0 and Dublin Core 762 // RSS 2.0 person strings should be valid e-mail addresses if possible. 763 if (isset($source["{$id}_email"])) { 764 $rss_author = $source["{$id}_email"]; 765 } 766 if (isset($source["{$id}_name"])) { 767 $rss_author = $source["{$id}_name"] 768 . (isset($rss_author) ? " <$rss_author>" : ''); 769 } 770 if (isset($rss_author)) { 771 $source[$id] = $rss_author; // goes to top-level author or contributor 772 $dest[$id_to] = $rss_author; // goes to dc:creator or dc:contributor 773 } 774 } 420 775 421 if (MAGPIE_DEBUG and $cache->ERROR) { 422 debug($cache->ERROR, E_USER_WARNING); 423 } 776 // Normalize Atom 1.0 and RSS 2.0 categories to Dublin Core... 777 function normalize_category (&$source, $from, &$dest, $to, $i) { 778 $cat_id = $this->element_id($from, $i); 779 $dc_id = $this->element_id($to, $i); 424 780 781 // first normalize category elements: Atom 1.0 <=> RSS 2.0 782 if ( isset($source["{$cat_id}@term"]) ) { // category identifier 783 $source[$cat_id] = $source["{$cat_id}@term"]; 784 } elseif ( $this->feed_type == RSS ) { 785 $source["{$cat_id}@term"] = $source[$cat_id]; 786 } 787 788 if ( isset($source["{$cat_id}@scheme"]) ) { // URI to taxonomy 789 $source["{$cat_id}@domain"] = $source["{$cat_id}@scheme"]; 790 } elseif ( isset($source["{$cat_id}@domain"]) ) { 791 $source["{$cat_id}@scheme"] = $source["{$cat_id}@domain"]; 792 } 425 793 426 $cache_status = 0; // response of check_cache 427 $request_headers = array(); // HTTP headers to send with fetch 428 $rss = 0; // parsed RSS object 429 $errormsg = 0; // errors, if any 794 // Now put the identifier into dc:subject 795 $dest[$dc_id] = $source[$cat_id]; 796 } 797 798 // ... or vice versa 799 function normalize_dc_subject (&$source, $from, &$dest, $to, $i) { 800 $dc_id = $this->element_id($from, $i); 801 $cat_id = $this->element_id($to, $i); 430 802 431 if (!$cache->ERROR) { 432 // return cache HIT, MISS, or STALE 433 $cache_status = $cache->check_cache( $url ); 434 } 803 $dest[$cat_id] = $source[$dc_id]; // RSS 2.0 804 $dest["{$cat_id}@term"] = $source[$dc_id]; // Atom 1.0 805 } 435 806 436 // if object cached, and cache is fresh, return cached obj 437 if ( $cache_status == 'HIT' ) { 438 $rss = $cache->get( $url ); 439 if ( isset($rss) and $rss ) { 440 $rss->from_cache = 1; 441 if ( MAGPIE_DEBUG > 1) {442 debug("MagpieRSS: Cache HIT", E_USER_NOTICE); 443 } 444 return $rss;445 446 807 // simplify the logic for normalize(). Makes sure that count of elements and 808 // each of multiple elements is normalized properly. If you need to mess 809 // with things like attributes or change formats or the like, pass it a 810 // callback to handle each element. 811 function normalize_element (&$source, $from, &$dest, $to, $via = NULL) { 812 if (isset($source[$from]) or isset($source["{$from}#"])) { 813 if (isset($source["{$from}#"])) { 814 $n = $source["{$from}#"]; 815 $dest["{$to}#"] = $source["{$from}#"]; 816 } 817 else { $n = 1; } 447 818 448 // else attempt a conditional get 819 for ($i = 1; $i <= $n; $i++) { 820 if (isset($via)) { // custom callback for ninja attacks 821 $this->{$via}($source, $from, $dest, $to, $i); 822 } 823 else { // just make it the same 824 $from_id = $this->element_id($from, $i); 825 $to_id = $this->element_id($to, $i); 826 $dest[$to_id] = $source[$from_id]; 827 } 828 } 829 } 830 } 449 831 450 // setup headers 451 if ( $cache_status == 'STALE' ) { 452 $rss = $cache->get( $url ); 453 if ( $rss->etag and $rss->last_modified ) { 454 $request_headers['If-None-Match'] = $rss->etag; 455 $request_headers['If-Last-Modified'] = $rss->last_modified; 832 function normalize () { 833 // if atom populate rss fields and normalize 0.3 and 1.0 feeds 834 if ( $this->is_atom() ) { 835 // Atom 1.0 elements <=> Atom 0.3 elements (Thanks, o brilliant wordsmiths of the Atom 1.0 standard!) 836 if ($this->feed_version < 1.0) { 837 $this->normalize_element($this->channel, 'tagline', $this->channel, 'subtitle'); 838 $this->normalize_element($this->channel, 'copyright', $this->channel, 'rights'); 839 $this->normalize_element($this->channel, 'modified', $this->channel, 'updated'); 840 } else { 841 $this->normalize_element($this->channel, 'subtitle', $this->channel, 'tagline'); 842 $this->normalize_element($this->channel, 'rights', $this->channel, 'copyright'); 843 $this->normalize_element($this->channel, 'updated', $this->channel, 'modified'); 844 } 845 $this->normalize_element($this->channel, 'author', $this->channel['dc'], 'creator', 'normalize_atom_person'); 846 $this->normalize_element($this->channel, 'contributor', $this->channel['dc'], 'contributor', 'normalize_atom_person'); 847 848 // Atom elements to RSS elements 849 $this->normalize_element($this->channel, 'subtitle', $this->channel, 'description'); 850 851 if ( isset($this->channel['logo']) ) { 852 $this->normalize_element($this->channel, 'logo', $this->image, 'url'); 853 $this->normalize_element($this->channel, 'link', $this->image, 'link'); 854 $this->normalize_element($this->channel, 'title', $this->image, 'title'); 855 } 856 857 for ( $i = 0; $i < count($this->items); $i++) { 858 $item = $this->items[$i]; 859 860 // Atom 1.0 elements <=> Atom 0.3 elements 861 if ($this->feed_version < 1.0) { 862 $this->normalize_element($item, 'modified', $item, 'updated'); 863 $this->normalize_element($item, 'issued', $item, 'published'); 864 } else { 865 $this->normalize_element($item, 'updated', $item, 'modified'); 866 $this->normalize_element($item, 'published', $item, 'issued'); 867 } 868 869 // "If an atom:entry element does not contain 870 // atom:author elements, then the atom:author elements 871 // of the contained atom:source element are considered 872 // to apply. In an Atom Feed Document, the atom:author 873 // elements of the containing atom:feed element are 874 // considered to apply to the entry if there are no 875 // atom:author elements in the locations described 876 // above." <http://atompub.org/2005/08/17/draft-ietf-atompub-format-11.html#rfc.section.4.2.1> 877 if (!isset($item["author#"])) { 878 if (isset($item["source_author#"])) { // from aggregation source 879 $source = $item; 880 $author = "source_author"; 881 } elseif (isset($this->channel["author#"])) { // from containing feed 882 $source = $this->channel; 883 $author = "author"; 884 } else { 885 $author = null; 886 } 887 888 if (!is_null($author)) { 889 $item["author#"] = $source["{$author}#"]; 890 for ($au = 1; $au <= $item["author#"]; $au++) { 891 $id_to = $this->element_id('author', $au); 892 $id_from = $this->element_id($author, $au); 893 894 $item[$id_to] = $source[$id_from]; 895 foreach (array('name', 'email', 'uri', 'url') as $what) { 896 if (isset($source["{$id_from}_{$what}"])) { 897 $item["{$id_to}_{$what}"] = $source["{$id_from}_{$what}"]; 898 } 899 } 456 900 } 457 901 } 902 } 458 903 459 $resp = _fetch_remote_file( $url, $request_headers ); 904 // Atom elements to RSS elements 905 $this->normalize_element($item, 'author', $item['dc'], 'creator', 'normalize_atom_person'); 906 $this->normalize_element($item, 'contributor', $item['dc'], 'contributor', 'normalize_atom_person'); 907 $this->normalize_element($item, 'summary', $item, 'description'); 908 $this->normalize_element($item, 'atom_content', $item['content'], 'encoded'); 909 $this->normalize_element($item, 'link_enclosure', $item, 'enclosure', 'normalize_enclosure'); 460 910 461 if (isset($resp) and $resp) { 462 if ($resp->status == '304' ) { 463 // we have the most current copy 464 if ( MAGPIE_DEBUG > 1) { 465 debug("Got 304 for $url"); 466 } 467 // reset cache on 304 (at minutillo insistent prodding) 468 $cache->set($url, $rss); 469 return $rss; 911 // Categories 912 if ( isset($item['category#']) ) { // Atom 1.0 categories to dc:subject and RSS 2.0 categories 913 $this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category'); 914 } 915 elseif ( isset($item['dc']['subject#']) ) { // dc:subject to Atom 1.0 and RSS 2.0 categories 916 $this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject'); 917 } 918 919 // Normalized item timestamp 920 $atom_date = (isset($item['published']) ) ? $item['published'] : $item['updated']; 921 if ( $atom_date ) { 922 $epoch = @parse_w3cdtf($atom_date); 923 if ($epoch and $epoch > 0) { 924 $item['date_timestamp'] = $epoch; 925 } 926 } 927 928 $this->items[$i] = $item; 929 } 930 } 931 elseif ( $this->is_rss() ) { 932 // RSS elements to Atom elements 933 $this->normalize_element($this->channel, 'description', $this->channel, 'tagline'); // Atom 0.3 934 $this->normalize_element($this->channel, 'description', $this->channel, 'subtitle'); // Atom 1.0 (yay wordsmithing!) 935 $this->normalize_element($this->image, 'url', $this->channel, 'logo'); 936 937 for ( $i = 0; $i < count($this->items); $i++) { 938 $item = $this->items[$i]; 939 940 // RSS elements to Atom elements 941 $this->normalize_element($item, 'description', $item, 'summary'); 942 $this->normalize_element($item, 'enclosure', $item, 'link_enclosure', 'normalize_enclosure'); 943 944 // Categories 945 if ( isset($item['category#']) ) { // RSS 2.0 categories to dc:subject and Atom 1.0 categories 946 $this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category'); 470 947 } 471 elseif ( is_success( $resp->status ) ) { 472 $rss = _response_to_rss( $resp ); 473 if ( $rss ) { 474 if (MAGPIE_DEBUG > 1) { 475 debug("Fetch successful"); 476 } 477 // add object to cache 478 $cache->set( $url, $rss ); 479 return $rss; 480 } 948 elseif ( isset($item['dc']['subject#']) ) { // dc:subject to Atom 1.0 and RSS 2.0 categories 949 $this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject'); 481 950 } 482 else { 483 $errormsg = "Failed to fetch $url. "; 484 if ( $resp->error ) { 485 # compensate for Snoopy's annoying habbit to tacking 486 # on '\n' 487 $http_error = substr($resp->error, 0, -2); 488 $errormsg .= "(HTTP Error: $http_error)"; 489 } 490 else { 491 $errormsg .= "(HTTP Response: " . $resp->response_code .')'; 492 } 951 952 // Normalized item timestamp 953 if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) { 954 $epoch = @parse_w3cdtf($item['dc']['date']); 955 if ($epoch and $epoch > 0) { 956 $item['date_timestamp'] = $epoch; 957 } 493 958 } 959 elseif ( isset($item['pubdate']) ) { 960 $epoch = @strtotime($item['pubdate']); 961 if ($epoch > 0) { 962 $item['date_timestamp'] = $epoch; 963 } 964 } 965 966 $this->items[$i] = $item; 494 967 } 495 else { 496 $errormsg = "Unable to retrieve RSS file for unknown reasons."; 497 } 968 } 969 } 970 971 972 function is_rss () { 973 if ( $this->feed_type == RSS ) { 974 return $this->feed_version; 975 } 976 else { 977 return false; 978 } 979 } 980 981 function is_atom() { 982 if ( $this->feed_type == ATOM ) { 983 return $this->feed_version; 984 } 985 else { 986 return false; 987 } 988 } 498 989 499 // else fetch failed 990 /** 991 * return XML parser, and possibly re-encoded source 992 * 993 */ 994 function create_parser($source, $out_enc, $in_enc, $detect) { 995 if ( substr(phpversion(),0,1) == 5) { 996 $parser = $this->php5_create_parser($in_enc, $detect); 997 } 998 else { 999 list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect); 1000 } 1001 if ($out_enc) { 1002 $this->encoding = $out_enc; 1003 xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc); 1004 } 1005 xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, false); 1006 return array($parser, $source); 1007 } 1008 1009 /** 1010 * Instantiate an XML parser under PHP5 1011 * 1012 * PHP5 will do a fine job of detecting input encoding 1013 * if passed an empty string as the encoding. 1014 * 1015 * All hail libxml2! 1016 * 1017 */ 1018 function php5_create_parser($in_enc, $detect) { 1019 // by default php5 does a fine job of detecting input encodings 1020 if(!$detect && $in_enc) { 1021 return xml_parser_create($in_enc); 1022 } 1023 else { 1024 return xml_parser_create(''); 1025 } 1026 } 1027 1028 /** 1029 * Instaniate an XML parser under PHP4 1030 * 1031 * Unfortunately PHP4's support for character encodings 1032 * and especially XML and character encodings sucks. As 1033 * long as the documents you parse only contain characters 1034 * from the ISO-8859-1 character set (a superset of ASCII, 1035 * and a subset of UTF-8) you're fine. However once you 1036 * step out of that comfy little world things get mad, bad, 1037 * and dangerous to know. 1038 * 1039 * The following code is based on SJM's work with FoF 1040 * @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss 1041 * 1042 */ 1043 function php4_create_parser($source, $in_enc, $detect) { 1044 if ( !$detect ) { 1045 return array(xml_parser_create($in_enc), $source); 1046 } 1047 1048 if (!$in_enc) { 1049 if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) { 1050 $in_enc = strtoupper($m[1]); 1051 $this->source_encoding = $in_enc; 1052 } 1053 else { 1054 $in_enc = 'UTF-8'; 1055 } 1056 } 1057 1058 if ($this->known_encoding($in_enc)) { 1059 return array(xml_parser_create($in_enc), $source); 1060 } 1061 1062 // the dectected encoding is not one of the simple encodings PHP knows 1063 1064 // attempt to use the iconv extension to 1065 // cast the XML to a known encoding 1066 // @see http://php.net/iconv 1067 1068 if (function_exists('iconv')) { 1069 $encoded_source = iconv($in_enc,'UTF-8', $source); 1070 if ($encoded_source) { 1071 return array(xml_parser_create('UTF-8'), $encoded_source); 1072 } 1073 } 1074 1075 // iconv didn't work, try mb_convert_encoding 1076 // @see http://php.net/mbstring 1077 if(function_exists('mb_convert_encoding')) { 1078 $encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc ); 1079 if ($encoded_source) { 1080 return array(xml_parser_create('UTF-8'), $encoded_source); 1081 } 1082 } 1083 1084 // else 1085 $this->error("Feed is in an unsupported character encoding. ($in_enc) " . 1086 "You may see strange artifacts, and mangled characters.", 1087 E_USER_NOTICE); 1088 1089 return array(xml_parser_create(), $source); 1090 } 1091 1092 function known_encoding($enc) { 1093 $enc = strtoupper($enc); 1094 if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) { 1095 return $enc; 1096 } 1097 else { 1098 return false; 1099 } 1100 } 500 1101 501 // attempt to return cached object 502 if ($rss) { 503 if ( MAGPIE_DEBUG ) { 504 debug("Returning STALE object for $url"); 1102 function error ($errormsg, $lvl=E_USER_WARNING) { 1103 // append PHP's error message if track_errors enabled 1104 if ( isset($php_errormsg) ) { 1105 $errormsg .= " ($php_errormsg)"; 1106 } 1107 if ( MAGPIE_DEBUG ) { 1108 trigger_error( $errormsg, $lvl); 1109 } 1110 else { 1111 error_log( $errormsg, 0); 1112 } 1113 1114 $notices = E_USER_NOTICE|E_NOTICE; 1115 if ( $lvl&$notices ) { 1116 $this->WARNING = $errormsg; 1117 } else { 1118 $this->ERROR = $errormsg; 1119 } 1120 } 1121 1122 // magic ID function for multiple elemenets. 1123 // can be called as static MagpieRSS::element_id() 1124 function element_id ($el, $counter) { 1125 return $el . (($counter > 1) ? '#'.$counter : ''); 1126 } 1127 1128 function map_attrs($k, $v) { 1129 return $k.'="'.htmlspecialchars($v, ENT_COMPAT, $this->encoding).'"'; 1130 } 1131 1132 function accepts_namespaced_xml ($attrs) { 1133 $mode = (isset($attrs['mode']) ? trim(strtolower($attrs['mode'])) : 'xml'); 1134 $type = (isset($attrs['type']) ? trim(strtolower($attrs['type'])) : null); 1135 if ($this->feed_type == ATOM and $this->feed_version < 1.0) { 1136 if ($mode=='xml' and preg_match(':[/+](html|xml)$:i', $type)) { 1137 $ret = true; 1138 } else { 1139 $ret = false; 505 1140 } 506 return $rss; 1141 } elseif ($this->feed_type == ATOM and $this->feed_version >= 1.0) { 1142 if ($type=='xhtml' or preg_match(':[/+]xml$:i', $type)) { 1143 $ret = true; 1144 } else { 1145 $ret = false; 1146 } 1147 } else { 1148 $ret = false; // Don't munge unless you're sure 507 1149 } 1150 return $ret; 1151 } 1152 } // end class RSS 508 1153 509 // else we totally failed510 // error( $errormsg );511 1154 512 return false; 1155 // patch to support medieval versions of PHP4.1.x, 1156 // courtesy, Ryan Currie, ryan@digibliss.com 513 1157 514 } // end if ( !MAGPIE_CACHE_ON) {515 } // end fetch_rss() 516 endif;1158 if (!function_exists('array_change_key_case')) { 1159 define("CASE_UPPER",1); 1160 define("CASE_LOWER",0); 517 1161 518 function _fetch_remote_file ($url, $headers = "" ) {519 // Snoopy is an HTTP client in PHP520 $client = new Snoopy();521 $client->agent = MAGPIE_USER_AGENT;522 $client->read_timeout = MAGPIE_FETCH_TIME_OUT;523 $client->use_gzip = MAGPIE_USE_GZIP;524 if (is_array($headers) ) {525 $client->rawheaders = $headers;526 }527 1162 528 @$client->fetch($url); 529 return $client; 1163 function array_change_key_case($array,$case=CASE_LOWER) { 1164 if ($case==CASE_LOWER) $cmd='strtolower'; 1165 elseif ($case==CASE_UPPER) $cmd='strtoupper'; 1166 foreach($array as $key=>$value) { 1167 $output[$cmd($key)]=$value; 1168 } 1169 return $output; 1170 } 530 1171 531 1172 } 532 1173 533 function _response_to_rss ($resp) { 534 $rss = new MagpieRSS( $resp->results ); 1174 ################################################################################ 1175 ## WordPress: Load in Snoopy from wp-includes ################################## 1176 ################################################################################ 535 1177 536 // if RSS parsed successfully 537 if ( $rss and !$rss->ERROR) { 1178 require_once( ABSPATH . WPINC . '/class-snoopy.php'); 538 1179 539 // find Etag, and Last-Modified 540 foreach($resp->headers as $h) { 541 // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1" 542 if (strpos($h, ": ")) { 543 list($field, $val) = explode(": ", $h, 2); 544 } 545 else { 546 $field = $h; 547 $val = ""; 548 } 1180 ################################################################################ 1181 ## rss_fetch.inc: from MagpieRSS 0.8a ########################################## 1182 ################################################################################ 549 1183 550 if ( $field == 'ETag' ) { 551 $rss->etag = $val; 552 } 1184 /*=======================================================================*\ 1185 Function: fetch_rss: 1186 Purpose: return RSS object for the give url 1187 maintain the cache 1188 Input: url of RSS file 1189 Output: parsed RSS object (see rss_parse.inc) 553 1190 554 if ( $field == 'Last-Modified' ) { 555 $rss->last_modified = $val; 556 } 557 } 1191 NOTES ON CACHEING: 1192 If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache. 1193 1194 NOTES ON RETRIEVING REMOTE FILES: 1195 If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will 1196 return a cached object, and touch the cache object upon recieving a 1197 304. 1198 1199 NOTES ON FAILED REQUESTS: 1200 If there is an HTTP error while fetching an RSS object, the cached 1201 version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off) 1202 \*=======================================================================*/ 558 1203 559 return $rss; 560 } // else construct error message 561 else { 562 $errormsg = "Failed to parse RSS file."; 1204 define('MAGPIE_VERSION', '0.85'); 563 1205 564 if ($rss) { 565 $errormsg .= " (" . $rss->ERROR . ")"; 566 } 567 // error($errormsg); 1206 $MAGPIE_ERROR = ""; 568 1207 569 return false; 570 } // end if ($rss and !$rss->error) 1208 function fetch_rss ($url) { 1209 // initialize constants 1210 init(); 1211 1212 if ( !isset($url) ) { 1213 error("fetch_rss called without a url"); 1214 return false; 1215 } 1216 1217 // if cache is disabled 1218 if ( !MAGPIE_CACHE_ON ) { 1219 // fetch file, and parse it 1220 $resp = _fetch_remote_file( $url ); 1221 if ( is_success( $resp->status ) ) { 1222 return _response_to_rss( $resp, $url ); 1223 } 1224 else { 1225 error("Failed to fetch $url and cache is off"); 1226 return false; 1227 } 1228 } 1229 // else cache is ON 1230 else { 1231 // Flow 1232 // 1. check cache 1233 // 2. if there is a hit, make sure its fresh 1234 // 3. if cached obj fails freshness check, fetch remote 1235 // 4. if remote fails, return stale object, or error 1236 1237 $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE ); 1238 1239 if (MAGPIE_DEBUG and $cache->ERROR) { 1240 debug($cache->ERROR, E_USER_WARNING); 1241 } 1242 1243 1244 $cache_status = 0; // response of check_cache 1245 $request_headers = array(); // HTTP headers to send with fetch 1246 $rss = 0; // parsed RSS object 1247 $errormsg = 0; // errors, if any 1248 1249 // store parsed XML by desired output encoding 1250 // as character munging happens at parse time 1251 $cache_key = $url . MAGPIE_OUTPUT_ENCODING; 1252 1253 if (!$cache->ERROR) { 1254 // return cache HIT, MISS, or STALE 1255 $cache_status = $cache->check_cache( $cache_key); 1256 } 1257 1258 // if object cached, and cache is fresh, return cached obj 1259 if ( $cache_status == 'HIT' ) { 1260 $rss = $cache->get( $cache_key ); 1261 if ( isset($rss) and $rss ) { 1262 // should be cache age 1263 $rss->from_cache = 1; 1264 if ( MAGPIE_DEBUG > 1) { 1265 debug("MagpieRSS: Cache HIT", E_USER_NOTICE); 1266 } 1267 return $rss; 1268 } 1269 } 1270 1271 // else attempt a conditional get 1272 1273 // setup headers 1274 if ( $cache_status == 'STALE' ) { 1275 $rss = $cache->get( $cache_key ); 1276 if ( $rss and $rss->etag and $rss->last_modified ) { 1277 $request_headers['If-None-Match'] = $rss->etag; 1278 $request_headers['If-Last-Modified'] = $rss->last_modified; 1279 } 1280 } 1281 1282 $resp = _fetch_remote_file( $url, $request_headers ); 1283 1284 if (isset($resp) and $resp) { 1285 if ($resp->status == '304' ) { 1286 // we have the most current copy 1287 if ( MAGPIE_DEBUG > 1) { 1288 debug("Got 304 for $url"); 1289 } 1290 // reset cache on 304 (at minutillo insistent prodding) 1291 $cache->set($cache_key, $rss); 1292 return $rss; 1293 } 1294 elseif ( is_success( $resp->status ) ) { 1295 $rss = _response_to_rss( $resp, $url ); 1296 if ( $rss ) { 1297 if (MAGPIE_DEBUG > 1) { 1298 debug("Fetch successful"); 1299 } 1300 // add object to cache 1301 $cache->set( $cache_key, $rss ); 1302 return $rss; 1303 } 1304 } 1305 else { 1306 $errormsg = "Failed to fetch $url "; 1307 if ( $resp->status == '-100' ) { 1308 $errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)"; 1309 } 1310 elseif ( $resp->error ) { 1311 # compensate for Snoopy's annoying habbit to tacking 1312 # on '\n' 1313 $http_error = substr($resp->error, 0, -2); 1314 $errormsg .= "(HTTP Error: $http_error)"; 1315 } 1316 else { 1317 $errormsg .= "(HTTP Response: " . $resp->response_code .')'; 1318 } 1319 } 1320 } 1321 else { 1322 $errormsg = "Unable to retrieve RSS file for unknown reasons."; 1323 } 1324 1325 // else fetch failed 1326 1327 // attempt to return cached object 1328 if ($rss) { 1329 if ( MAGPIE_DEBUG ) { 1330 debug("Returning STALE object for $url"); 1331 } 1332 return $rss; 1333 } 1334 1335 // else we totally failed 1336 error( $errormsg ); 1337 1338 return false; 1339 1340 } // end if ( !MAGPIE_CACHE_ON ) { 1341 } // end fetch_rss() 1342 1343 /*=======================================================================*\ 1344 Function: error 1345 Purpose: set MAGPIE_ERROR, and trigger error 1346 \*=======================================================================*/ 1347 1348 function error ($errormsg, $lvl=E_USER_WARNING) { 1349 global $MAGPIE_ERROR; 1350 1351 // append PHP's error message if track_errors enabled 1352 if ( isset($php_errormsg) ) { 1353 $errormsg .= " ($php_errormsg)"; 1354 } 1355 if ( $errormsg ) { 1356 $errormsg = "MagpieRSS: $errormsg"; 1357 $MAGPIE_ERROR = $errormsg; 1358 trigger_error( $errormsg, $lvl); 1359 } 571 1360 } 572 1361 1362 function debug ($debugmsg, $lvl=E_USER_NOTICE) { 1363 trigger_error("MagpieRSS [debug] $debugmsg", $lvl); 1364 } 1365 573 1366 /*=======================================================================*\ 574 Function: init 575 Purpose: setup constants with default values 576 check for user overrides 1367 Function: magpie_error 1368 Purpose: accessor for the magpie error variable 577 1369 \*=======================================================================*/ 578 function init () { 579 if ( defined('MAGPIE_INITALIZED') ) { 580 return; 581 } 582 else { 583 define('MAGPIE_INITALIZED', 1); 584 } 1370 function magpie_error ($errormsg="") { 1371 global $MAGPIE_ERROR; 1372 1373 if ( isset($errormsg) and $errormsg ) { 1374 $MAGPIE_ERROR = $errormsg; 1375 } 1376 1377 return $MAGPIE_ERROR; 1378 } 585 1379 586 if ( !defined('MAGPIE_CACHE_ON') ) { 587 define('MAGPIE_CACHE_ON', 1); 588 } 1380 /*=======================================================================*\ 1381 Function: _fetch_remote_file 1382 Purpose: retrieve an arbitrary remote file 1383 Input: url of the remote file 1384 headers to send along with the request (optional) 1385 Output: an HTTP response object (see Snoopy.class.inc) 1386 \*=======================================================================*/ 1387 function _fetch_remote_file ($url, $headers = "" ) { 1388 // Snoopy is an HTTP client in PHP 1389 $client = new Snoopy(); 1390 $client->agent = MAGPIE_USER_AGENT; 1391 $client->read_timeout = MAGPIE_FETCH_TIME_OUT; 1392 $client->use_gzip = MAGPIE_USE_GZIP; 1393 if (is_array($headers) ) { 1394 $client->rawheaders = $headers; 1395 } 1396 1397 @$client->fetch($url); 1398 return $client; 589 1399 590 if ( !defined('MAGPIE_CACHE_DIR') ) { 591 define('MAGPIE_CACHE_DIR', './cache'); 592 } 1400 } 593 1401 594 if ( !defined('MAGPIE_CACHE_AGE') ) { 595 define('MAGPIE_CACHE_AGE', 60*60); // one hour 596 } 1402 /*=======================================================================*\ 1403 Function: _response_to_rss 1404 Purpose: parse an HTTP response object into an RSS object 1405 Input: an HTTP response object (see Snoopy) 1406 Output: parsed RSS object (see rss_parse) 1407 \*=======================================================================*/ 1408 function _response_to_rss ($resp, $url = null) { 1409 $rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING, $url ); 1410 1411 // if RSS parsed successfully 1412 if ( $rss and !$rss->ERROR) { 1413 $rss->http_status = $resp->status; 597 1414 598 if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) { 599 define('MAGPIE_CACHE_FRESH_ONLY', 0); 600 } 1415 // find Etag, and Last-Modified 1416 foreach($resp->headers as $h) { 1417 // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1" 1418 if (strpos($h, ": ")) { 1419 list($field, $val) = explode(": ", $h, 2); 1420 } 1421 else { 1422 $field = $h; 1423 $val = ""; 1424 } 1425 1426 $rss->header[$field] = $val; 601 1427 602 if ( !defined('MAGPIE_DEBUG') ) { 603 define('MAGPIE_DEBUG', 0); 604 } 1428 if ( $field == 'ETag' ) { 1429 $rss->etag = $val; 1430 } 1431 1432 if ( $field == 'Last-Modified' ) { 1433 $rss->last_modified = $val; 1434 } 1435 } 1436 1437 return $rss; 1438 } // else construct error message 1439 else { 1440 $errormsg = "Failed to parse RSS file."; 1441 1442 if ($rss) { 1443 $errormsg .= " (" . $rss->ERROR . ")"; 1444 } 1445 error($errormsg); 1446 1447 return false; 1448 } // end if ($rss and !$rss->error) 1449 } 605 1450 606 if ( !defined('MAGPIE_USER_AGENT') ) { 607 $ua = 'WordPress/' . $GLOBALS['wp_version']; 1451 /*=======================================================================*\ 1452 Function: init 1453 Purpose: setup constants with default values 1454 check for user overrides 1455 \*=======================================================================*/ 1456 function init () { 1457 if ( defined('MAGPIE_INITALIZED') ) { 1458 return; 1459 } 1460 else { 1461 define('MAGPIE_INITALIZED', true); 1462 } 1463 1464 if ( !defined('MAGPIE_CACHE_ON') ) { 1465 define('MAGPIE_CACHE_ON', true); 1466 } 608 1467 609 if ( MAGPIE_CACHE_ON ) { 610 $ua = $ua . ')'; 611 } 612 else { 613 $ua = $ua . '; No cache)'; 614 } 1468 if ( !defined('MAGPIE_CACHE_DIR') ) { 1469 define('MAGPIE_CACHE_DIR', './cache'); 1470 } 615 1471 616 define('MAGPIE_USER_AGENT', $ua); 617 } 1472 if ( !defined('MAGPIE_CACHE_AGE') ) { 1473 define('MAGPIE_CACHE_AGE', 60*60); // one hour 1474 } 618 1475 619 if ( !defined('MAGPIE_FETCH_TIME_OUT') ) {620 define('MAGPIE_FETCH_TIME_OUT', 2); // 2 second timeout 621 1476 if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) { 1477 define('MAGPIE_CACHE_FRESH_ONLY', false); 1478 } 622 1479 623 // use gzip encoding to fetch rss files if supported? 624 if ( !defined('MAGPIE_USE_GZIP') ) { 625 define('MAGPIE_USE_GZIP', true); 626 } 1480 if ( !defined('MAGPIE_OUTPUT_ENCODING') ) { 1481 define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1'); 1482 } 1483 1484 if ( !defined('MAGPIE_INPUT_ENCODING') ) { 1485 define('MAGPIE_INPUT_ENCODING', null); 1486 } 1487 1488 if ( !defined('MAGPIE_DETECT_ENCODING') ) { 1489 define('MAGPIE_DETECT_ENCODING', true); 1490 } 1491 1492 if ( !defined('MAGPIE_DEBUG') ) { 1493 define('MAGPIE_DEBUG', 0); 1494 } 1495 1496 if ( !defined('MAGPIE_USER_AGENT') ) { 1497 $ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net'; 1498 1499 if ( MAGPIE_CACHE_ON ) { 1500 $ua = $ua . ')'; 1501 } 1502 else { 1503 $ua = $ua . '; No cache)'; 1504 } 1505 1506 define('MAGPIE_USER_AGENT', $ua); 1507 } 1508 1509 if ( !defined('MAGPIE_FETCH_TIME_OUT') ) { 1510 define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout 1511 } 1512 1513 // use gzip encoding to fetch rss files if supported? 1514 if ( !defined('MAGPIE_USE_GZIP') ) { 1515 define('MAGPIE_USE_GZIP', true); 1516 } 627 1517 } 628 1518 629 function is_info ($sc) { 630 return $sc >= 100 && $sc < 200; 1519 // NOTE: the following code should really be in Snoopy, or at least 1520 // somewhere other then rss_fetch! 1521 1522 /*=======================================================================*\ 1523 HTTP STATUS CODE PREDICATES 1524 These functions attempt to classify an HTTP status code 1525 based on RFC 2616 and RFC 2518. 1526 1527 All of them take an HTTP status code as input, and return true or false 1528 1529 All this code is adapted from LWP's HTTP::Status. 1530 \*=======================================================================*/ 1531 1532 1533 /*=======================================================================*\ 1534 Function: is_info 1535 Purpose: return true if Informational status code 1536 \*=======================================================================*/ 1537 function is_info ($sc) { 1538 return $sc >= 100 && $sc < 200; 631 1539 } 632 1540 633 function is_success ($sc) { 634 return $sc >= 200 && $sc < 300; 1541 /*=======================================================================*\ 1542 Function: is_success 1543 Purpose: return true if Successful status code 1544 \*=======================================================================*/ 1545 function is_success ($sc) { 1546 return $sc >= 200 && $sc < 300; 635 1547 } 636 1548 637 function is_redirect ($sc) { 638 return $sc >= 300 && $sc < 400; 1549 /*=======================================================================*\ 1550 Function: is_redirect 1551 Purpose: return true if Redirection status code 1552 \*=======================================================================*/ 1553 function is_redirect ($sc) { 1554 return $sc >= 300 && $sc < 400; 639 1555 } 640 1556 641 function is_error ($sc) { 642 return $sc >= 400 && $sc < 600; 1557 /*=======================================================================*\ 1558 Function: is_error 1559 Purpose: return true if Error status code 1560 \*=======================================================================*/ 1561 function is_error ($sc) { 1562 return $sc >= 400 && $sc < 600; 643 1563 } 644 1564 645 function is_client_error ($sc) { 646 return $sc >= 400 && $sc < 500; 1565 /*=======================================================================*\ 1566 Function: is_client_error 1567 Purpose: return true if Error status code, and its a client error 1568 \*=======================================================================*/ 1569 function is_client_error ($sc) { 1570 return $sc >= 400 && $sc < 500; 647 1571 } 648 1572 649 function is_server_error ($sc) { 650 return $sc >= 500 && $sc < 600; 1573 /*=======================================================================*\ 1574 Function: is_client_error 1575 Purpose: return true if Error status code, and its a server error 1576 \*=======================================================================*/ 1577 function is_server_error ($sc) { 1578 return $sc >= 500 && $sc < 600; 651 1579 } 652 1580 1581 ################################################################################ 1582 ## rss_cache.inc: from WordPress 1.5 ########################################### 1583 ################################################################################ 1584 653 1585 class RSSCache { 654 1586 var $BASE_CACHE = 'wp-content/cache'; // where the cache files are stored 655 1587 var $MAX_AGE = 43200; // when are files stale, default twelve hours 656 1588 var $ERROR = ''; // accumulate error messages 657 1589 658 1590 function RSSCache ($base='', $age='') { 659 1591 if ( $base ) { 660 1592 $this->BASE_CACHE = $base; … … 662 1594 if ( $age ) { 663 1595 $this->MAX_AGE = $age; 664 1596 } 665 1597 666 1598 } 667 1599 668 1600 /*=======================================================================*\ 669 1601 Function: set 670 1602 Purpose: add an item to the cache, keyed on url 671 1603 Input: url from wich the rss file was fetched 672 Output: true on sucess 1604 Output: true on sucess 673 1605 \*=======================================================================*/ 674 1606 function set ($url, $rss) { 675 1607 global $wpdb; 676 1608 $cache_option = 'rss_' . $this->file_name( $url ); 677 1609 $cache_timestamp = 'rss_' . $this->file_name( $url ) . '_ts'; 678 679 // shouldn't these be using get_option() ? 680 if ( !$wpdb->get_var( $wpdb->prepare( "SELECT option_name FROM $wpdb->options WHERE option_name = %s", $cache_option ) ) ) 1610 1611 if ( !$wpdb->get_var("SELECT option_name FROM $wpdb->options WHERE option_name = '$cache_option'") ) 681 1612 add_option($cache_option, '', '', 'no'); 682 if ( !$wpdb->get_var( $wpdb->prepare( "SELECT option_name FROM $wpdb->options WHERE option_name = %s", $cache_timestamp )) )1613 if ( !$wpdb->get_var("SELECT option_name FROM $wpdb->options WHERE option_name = '$cache_timestamp'") ) 683 1614 add_option($cache_timestamp, '', '', 'no'); 684 1615 685 1616 update_option($cache_option, $rss); 686 1617 update_option($cache_timestamp, time() ); 687 1618 688 1619 return $cache_option; 689 1620 } 690 1621 691 1622 /*=======================================================================*\ 692 1623 Function: get 693 1624 Purpose: fetch an item from the cache 694 1625 Input: url from wich the rss file was fetched 695 Output: cached object on HIT, false on MISS 696 \*=======================================================================*/ 1626 Output: cached object on HIT, false on MISS 1627 \*=======================================================================*/ 697 1628 function get ($url) { 698 1629 $this->ERROR = ""; 699 1630 $cache_option = 'rss_' . $this->file_name( $url ); 700 1631 701 1632 if ( ! get_option( $cache_option ) ) { 702 $this->debug( 1633 $this->debug( 703 1634 "Cache doesn't contain: $url (cache option: $cache_option)" 704 1635 ); 705 1636 return 0; 706 1637 } 707 1638 708 1639 $rss = get_option( $cache_option ); 709 1640 710 1641 return $rss; 711 1642 } 712 1643 … … 715 1646 Purpose: check a url for membership in the cache 716 1647 and whether the object is older then MAX_AGE (ie. STALE) 717 1648 Input: url from wich the rss file was fetched 718 Output: cached object on HIT, false on MISS 719 \*=======================================================================*/ 1649 Output: cached object on HIT, false on MISS 1650 \*=======================================================================*/ 720 1651 function check_cache ( $url ) { 721 1652 $this->ERROR = ""; 722 1653 $cache_option = $this->file_name( $url ); … … 743 1674 744 1675 /*=======================================================================*\ 745 1676 Function: serialize 746 \*=======================================================================*/ 1677 \*=======================================================================*/ 747 1678 function serialize ( $rss ) { 748 1679 return serialize( $rss ); 749 1680 } 750 1681 751 1682 /*=======================================================================*\ 752 1683 Function: unserialize 753 \*=======================================================================*/ 1684 \*=======================================================================*/ 754 1685 function unserialize ( $data ) { 755 1686 return unserialize( $data ); 756 1687 } 757 1688 758 1689 /*=======================================================================*\ 759 1690 Function: file_name 760 1691 Purpose: map url to location in cache 761 1692 Input: url from wich the rss file was fetched 762 1693 Output: a file name 763 \*=======================================================================*/ 1694 \*=======================================================================*/ 764 1695 function file_name ($url) { 765 1696 return md5( $url ); 766 1697 } 767 1698 768 1699 /*=======================================================================*\ 769 1700 Function: error 770 1701 Purpose: register error 771 \*=======================================================================*/ 1702 \*=======================================================================*/ 772 1703 function error ($errormsg, $lvl=E_USER_WARNING) { 773 1704 // append PHP's error message if track_errors enabled 774 if ( isset($php_errormsg) ) { 1705 if ( isset($php_errormsg) ) { 775 1706 $errormsg .= " ($php_errormsg)"; 776 1707 } 777 1708 $this->ERROR = $errormsg; … … 789 1720 } 790 1721 } 791 1722 792 if ( !function_exists('parse_w3cdtf') ) : 793 function parse_w3cdtf ( $date_str ) { 1723 ################################################################################ 1724 ## rss_utils.inc: from MagpieRSS 0.8a ########################################## 1725 ################################################################################ 794 1726 795 # regex to match wc3dtf 796 $pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/"; 1727 /*======================================================================*\ 1728 Function: parse_w3cdtf 1729 Purpose: parse a W3CDTF date into unix epoch 797 1730 798 if ( preg_match( $pat, $date_str, $match ) ) { 799 list( $year, $month, $day, $hours, $minutes, $seconds) = 800 array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[7]); 1731 NOTE: http://www.w3.org/TR/NOTE-datetime 1732 \*======================================================================*/ 801 1733 802 # calc epoch for current date assuming GMT 803 $epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year); 1734 function parse_w3cdtf ( $date_str ) { 1735 1736 # regex to match wc3dtf 1737 $pat = "/^\s*(\d{4})(-(\d{2})(-(\d{2})(T(\d{2}):(\d{2})(:(\d{2})(\.\d+)?)?(?:([-+])(\d{2}):?(\d{2})|(Z))?)?)?)?\s*\$/"; 1738 1739 if ( preg_match( $pat, $date_str, $match ) ) { 1740 list( $year, $month, $day, $hours, $minutes, $seconds) = 1741 array( $match[1], $match[3], $match[5], $match[7], $match[8], $match[10]); 804 1742 805 $offset = 0; 806 if ( $match[11] == 'Z' ) { 807 # zulu time, aka GMT 808 } 809 else { 810 list( $tz_mod, $tz_hour, $tz_min ) = 811 array( $match[8], $match[9], $match[10]); 1743 # W3C dates can omit the time, the day of the month, or even the month. 1744 # Fill in any blanks using information from the present moment. --CWJ 1745 $default['hr'] = (int) gmdate('H'); 1746 $default['day'] = (int) gmdate('d'); 1747 $default['month'] = (int) gmdate('m'); 812 1748 813 # zero out the variables814 if ( ! $tz_hour ) { $tz_hour = 0; }815 if ( ! $tz_min ) { $tz_min = 0; }1749 if (is_null($hours)) : $hours = $default['hr']; $minutes = 0; $seconds = 0; endif; 1750 if (is_null($day)) : $day = $default['day']; endif; 1751 if (is_null($month)) : $month = $default['month']; endif; 816 1752 817 $offset_secs = (($tz_hour*60)+$tz_min)*60; 1753 # calc epoch for current date assuming GMT 1754 $epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year); 1755 1756 $offset = 0; 1757 if ( $match[14] == 'Z' ) { 1758 # zulu time, aka GMT 1759 } 1760 else { 1761 list( $tz_mod, $tz_hour, $tz_min ) = 1762 array( $match[12], $match[13], $match[14]); 1763 1764 # zero out the variables 1765 if ( ! $tz_hour ) { $tz_hour = 0; } 1766 if ( ! $tz_min ) { $tz_min = 0; } 1767 1768 $offset_secs = (($tz_hour*60)+$tz_min)*60; 1769 1770 # is timezone ahead of GMT? then subtract offset 1771 # 1772 if ( $tz_mod == '+' ) { 1773 $offset_secs = $offset_secs * -1; 1774 } 1775 1776 $offset = $offset_secs; 1777 } 1778 $epoch = $epoch + $offset; 1779 return $epoch; 1780 } 1781 else { 1782 return -1; 1783 } 1784 } 818 1785 819 # is timezone ahead of GMT? then subtract offset 820 # 821 if ( $tz_mod == '+' ) { 822 $offset_secs = $offset_secs * -1; 1786 # Relative URI static class: PHP class for resolving relative URLs 1787 # 1788 # This class is derived (under the terms of the GPL) from URL Class 0.3 by 1789 # Keyvan Minoukadeh <keyvan@k1m.com>, which is great but more than we need 1790 # for MagpieRSS's purposes. The class has been stripped down to a single 1791 # public method: Relative_URI::resolve($url, $base), which resolves the URI in 1792 # $url relative to the URI in $base 1793 # 1794 # FeedWordPress also uses this class. So if we have it loaded in, don't load it 1795 # again. 1796 # 1797 # -- Charles Johnson <technophilia@radgeek.com> 1798 if (!class_exists('Relative_URI')) { 1799 class Relative_URI 1800 { 1801 // Resolve relative URI in $url against the base URI in $base. If $base 1802 // is not supplied, then we use the REQUEST_URI of this script. 1803 // 1804 // I'm hoping this method reflects RFC 2396 Section 5.2 1805 function resolve ($url, $base = NULL) 1806 { 1807 if (is_null($base)): 1808 $base = 'http://'.$_SERVER['HTTP_HOST'].$_SERVER['REQUEST_URI']; 1809 endif; 1810 1811 $base = Relative_URI::_encode(trim($base)); 1812 $uri_parts = Relative_URI::_parse_url($base); 1813 1814 $url = Relative_URI::_encode(trim($url)); 1815 $parts = Relative_URI::_parse_url($url); 1816 1817 $uri_parts['fragment'] = (isset($parts['fragment']) ? $parts['fragment'] : null); 1818 $uri_parts['query'] = (isset($parts['query']) ? $parts['query'] : null); 1819 1820 // if path is empty, and scheme, host, and query are undefined, 1821 // the URL is referring the base URL 1822 1823 if (($parts['path'] == '') && !isset($parts['scheme']) && !isset($parts['host']) && !isset($parts['query'])) { 1824 // If the URI is empty or only a fragment, return the base URI 1825 return $base . (isset($parts['fragment']) ? '#'.$parts['fragment'] : ''); 1826 } elseif (isset($parts['scheme'])) { 1827 // If the scheme is set, then the URI is absolute. 1828 return $url; 1829 } elseif (isset($parts['host'])) { 1830 $uri_parts['host'] = $parts['host']; 1831 $uri_parts['path'] = $parts['path']; 1832 } else { 1833 // We have a relative path but not a host. 1834 1835 // start ugly fix: 1836 // prepend slash to path if base host is set, base path is not set, and url path is not absolute 1837 if ($uri_parts['host'] && ($uri_parts['path'] == '') 1838 && (strlen($parts['path']) > 0) 1839 && (substr($parts['path'], 0, 1) != '/')) { 1840 $parts['path'] = '/'.$parts['path']; 1841 } // end ugly fix 1842 1843 if (substr($parts['path'], 0, 1) == '/') { 1844 $uri_parts['path'] = $parts['path']; 1845 } else { 1846 // copy base path excluding any characters after the last (right-most) slash character 1847 $buffer = substr($uri_parts['path'], 0, (int)strrpos($uri_parts['path'], '/')+1); 1848 // append relative path 1849 $buffer .= $parts['path']; 1850 // remove "./" where "." is a complete path segment. 1851 $buffer = str_replace('/./', '/', $buffer); 1852 if (substr($buffer, 0, 2) == './') { 1853 $buffer = substr($buffer, 2); 1854 } 1855 // if buffer ends with "." as a complete path segment, remove it 1856 if (substr($buffer, -2) == '/.') { 1857 $buffer = substr($buffer, 0, -1); 1858 } 1859 // remove "<segment>/../" where <segment> is a complete path segment not equal to ".." 1860 $search_finished = false; 1861 $segment = explode('/', $buffer); 1862 while (!$search_finished) { 1863 for ($x=0; $x+1 < count($segment);) { 1864 if (($segment[$x] != '') && ($segment[$x] != '..') && ($segment[$x+1] == '..')) { 1865 if ($x+2 == count($segment)) $segment[] = ''; 1866 unset($segment[$x], $segment[$x+1]); 1867 $segment = array_values($segment); 1868 continue 2; 1869 } else { 1870 $x++; 1871 } 1872 } 1873 $search_finished = true; 1874 } 1875 $buffer = (count($segment) == 1) ? '/' : implode('/', $segment); 1876 $uri_parts['path'] = $buffer; 1877 1878 } 823 1879 } 824 825 $offset = $offset_secs; 1880 1881 // If we've gotten to this point, we can try to put the pieces 1882 // back together. 1883 $ret = ''; 1884 if (isset($uri_parts['scheme'])) $ret .= $uri_parts['scheme'].':'; 1885 if (isset($uri_parts['user'])) { 1886 $ret .= $uri_parts['user']; 1887 if (isset($uri_parts['pass'])) $ret .= ':'.$uri_parts['parts']; 1888 $ret .= '@'; 1889 } 1890 if (isset($uri_parts['host'])) { 1891 $ret .= '//'.$uri_parts['host']; 1892 if (isset($uri_parts['port'])) $ret .= ':'.$uri_parts['port']; 1893 } 1894 $ret .= $uri_parts['path']; 1895 if (isset($uri_parts['query'])) $ret .= '?'.$uri_parts['query']; 1896 if (isset($uri_parts['fragment'])) $ret .= '#'.$uri_parts['fragment']; 1897 1898 return $ret; 1899 } 1900 1901 /** 1902 * Parse URL 1903 * 1904 * Regular expression grabbed from RFC 2396 Appendix B. 1905 * This is a replacement for PHPs builtin parse_url(). 1906 * @param string $url 1907 * @access private 1908 * @return array 1909 */ 1910 function _parse_url($url) 1911 { 1912 // I'm using this pattern instead of parse_url() as there's a few strings where parse_url() 1913 // generates a warning. 1914 if (preg_match('!^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?!', $url, $match)) { 1915 $parts = array(); 1916 if ($match[1] != '') $parts['scheme'] = $match[2]; 1917 if ($match[3] != '') $parts['auth'] = $match[4]; 1918 // parse auth 1919 if (isset($parts['auth'])) { 1920 // store user info 1921 if (($at_pos = strpos($parts['auth'], '@')) !== false) { 1922 $userinfo = explode(':', substr($parts['auth'], 0, $at_pos), 2); 1923 $parts['user'] = $userinfo[0]; 1924 if (isset($userinfo[1])) $parts['pass'] = $userinfo[1]; 1925 $parts['auth'] = substr($parts['auth'], $at_pos+1); 1926 } 1927 // get port number 1928 if ($port_pos = strrpos($parts['auth'], ':')) { 1929 $parts['host'] = substr($parts['auth'], 0, $port_pos); 1930 $parts['port'] = (int)substr($parts['auth'], $port_pos+1); 1931 if ($parts['port'] < 1) $parts['port'] = null; 1932 } else { 1933 $parts['host'] = $parts['auth']; 1934 } 1935 } 1936 unset($parts['auth']); 1937 $parts['path'] = $match[5]; 1938 if (isset($match[6]) && ($match[6] != '')) $parts['query'] = $match[7]; 1939 if (isset($match[8]) && ($match[8] != '')) $parts['fragment'] = $match[9]; 1940 return $parts; 826 1941 } 827 $epoch = $epoch + $offset; 828 return $epoch; 829 } 830 else { 831 return -1; 832 } 1942 // shouldn't reach here 1943 return array('path'=>''); 1944 } 1945 1946 function _encode($string) 1947 { 1948 static $replace = array(); 1949 if (!count($replace)) { 1950 $find = array(32, 34, 60, 62, 123, 124, 125, 91, 92, 93, 94, 96, 127); 1951 $find = array_merge(range(0, 31), $find); 1952 $find = array_map('chr', $find); 1953 foreach ($find as $char) { 1954 $replace[$char] = '%'.bin2hex($char); 1955 } 1956 } 1957 // escape control characters and a few other characters 1958 $encoded = strtr($string, $replace); 1959 // remove any character outside the hex range: 21 - 7E (see www.asciitable.com) 1960 return preg_replace('/[^\x21-\x7e]/', '', $encoded); 1961 } 1962 } // class Relative_URI 833 1963 } 834 endif;835 1964 836 if ( !function_exists('wp_rss') ) : 837 function wp_rss( $url, $num_items = -1 ) { 838 if ( $rss = fetch_rss( $url ) ) { 839 echo '<ul>'; 1965 ################################################################################ 1966 ## WordPress: wp_rss(), get_rss() ############################################## 1967 ################################################################################ 840 1968 841 if ( $num_items !== -1 ) { 842 $rss->items = array_slice( $rss->items, 0, $num_items ); 843 } 844 845 foreach ( $rss->items as $item ) { 846 printf( 847 '<li><a href="%1$s" title="%2$s">%3$s</a></li>', 848 clean_url( $item['link'] ), 849 attribute_escape( strip_tags( $item['description'] ) ), 850 htmlentities( $item['title'] ) 851 ); 852 } 853 854 echo '</ul>'; 855 } else { 856 _e( 'An error has occurred, which probably means the feed is down. Try again later.' ); 1969 function wp_rss ($url, $num) { 1970 //ini_set("display_errors", false); uncomment to suppress php errors thrown if the feed is not returned. 1971 $num_items = $num; 1972 $rss = fetch_rss($url); 1973 if ( $rss ) { 1974 echo "<ul>"; 1975 $rss->items = array_slice($rss->items, 0, $num_items); 1976 foreach ($rss->items as $item ) { 1977 echo "<li>\n"; 1978 echo "<a href='$item[link]' title='$item[description]'>"; 1979 echo htmlentities($item['title']); 1980 echo "</a><br />\n"; 1981 echo "</li>\n"; 1982 } 1983 echo "</ul>"; 857 1984 } 1985 else { 1986 echo "an error has occured the feed is probably down, try again later."; 1987 } 858 1988 } 859 endif;860 1989 861 if ( !function_exists('get_rss') ) : 862 function get_rss ($url, $num_items = 5) { // Like get posts, but for RSS 1990 function get_rss ($uri, $num = 5) { // Like get posts, but for RSS 863 1991 $rss = fetch_rss($url); 864 1992 if ( $rss ) { 865 1993 $rss->items = array_slice($rss->items, 0, $num_items); … … 870 1998 echo "</a><br />\n"; 871 1999 echo "</li>\n"; 872 2000 } 2001 return $posts; 873 2002 } else { 874 2003 return false; 875 2004 } 876 2005 } 877 endif;878 879 2006 ?> 880 No newline at end of file