| 1 | <?php |
|---|
| 2 | /* |
|---|
| 3 | * atomlib.php - Atom Syndication Format PHP Library |
|---|
| 4 | * |
|---|
| 5 | * Project: http://code.google.com/p/phpatomlib/ |
|---|
| 6 | * |
|---|
| 7 | * Author: Elias Torres <elias@torrez.us> |
|---|
| 8 | * Version: 0.4 |
|---|
| 9 | * |
|---|
| 10 | */ |
|---|
| 11 | |
|---|
| 12 | class AtomFeed { |
|---|
| 13 | var $links = array(); |
|---|
| 14 | var $categories = array(); |
|---|
| 15 | |
|---|
| 16 | var $entries = array(); |
|---|
| 17 | } |
|---|
| 18 | |
|---|
| 19 | class AtomEntry { |
|---|
| 20 | var $links = array(); |
|---|
| 21 | var $categories = array(); |
|---|
| 22 | } |
|---|
| 23 | |
|---|
| 24 | class AtomParser { |
|---|
| 25 | |
|---|
| 26 | var $NS = 'http://www.w3.org/2005/Atom'; |
|---|
| 27 | var $ATOM_CONTENT_ELEMENTS = array('content','summary','title','subtitle','rights'); |
|---|
| 28 | var $ATOM_SIMPLE_ELEMENTS = array('id','updated','published','draft'); |
|---|
| 29 | |
|---|
| 30 | var $debug = false; |
|---|
| 31 | |
|---|
| 32 | var $depth = 0; |
|---|
| 33 | var $indent = 2; |
|---|
| 34 | var $in_content; |
|---|
| 35 | var $ns_contexts = array(); |
|---|
| 36 | var $ns_decls = array(); |
|---|
| 37 | var $content_ns_decls = array(); |
|---|
| 38 | var $content_ns_contexts = array(); |
|---|
| 39 | var $is_xhtml = false; |
|---|
| 40 | var $is_html = false; |
|---|
| 41 | var $is_text = true; |
|---|
| 42 | var $skipped_div = false; |
|---|
| 43 | |
|---|
| 44 | var $FILE = "php://input"; |
|---|
| 45 | |
|---|
| 46 | var $feed; |
|---|
| 47 | var $current; |
|---|
| 48 | |
|---|
| 49 | function AtomParser() { |
|---|
| 50 | |
|---|
| 51 | $this->feed = new AtomFeed(); |
|---|
| 52 | $this->current = null; |
|---|
| 53 | $this->map_attrs_func = create_function('$k,$v', 'return "$k=\"$v\"";'); |
|---|
| 54 | $this->map_xmlns_func = create_function('$p,$n', '$xd = "xmlns"; if(strlen($n[0])>0) $xd .= ":{$n[0]}"; return "{$xd}=\"{$n[1]}\"";'); |
|---|
| 55 | } |
|---|
| 56 | |
|---|
| 57 | function _p($msg) { |
|---|
| 58 | if($this->debug) { |
|---|
| 59 | print str_repeat(" ", $this->depth * $this->indent) . $msg ."\n"; |
|---|
| 60 | } |
|---|
| 61 | } |
|---|
| 62 | |
|---|
| 63 | function error_handler($log_level, $log_text, $error_file, $error_line) { |
|---|
| 64 | $this->error = $log_text; |
|---|
| 65 | } |
|---|
| 66 | |
|---|
| 67 | function parse() { |
|---|
| 68 | |
|---|
| 69 | set_error_handler(array(&$this, 'error_handler')); |
|---|
| 70 | |
|---|
| 71 | array_unshift($this->ns_contexts, array()); |
|---|
| 72 | |
|---|
| 73 | $parser = xml_parser_create_ns(); |
|---|
| 74 | xml_set_object($parser, $this); |
|---|
| 75 | xml_set_element_handler($parser, "start_element", "end_element"); |
|---|
| 76 | xml_parser_set_option($parser,XML_OPTION_CASE_FOLDING,0); |
|---|
| 77 | xml_parser_set_option($parser,XML_OPTION_SKIP_WHITE,0); |
|---|
| 78 | xml_set_character_data_handler($parser, "cdata"); |
|---|
| 79 | xml_set_default_handler($parser, "_default"); |
|---|
| 80 | xml_set_start_namespace_decl_handler($parser, "start_ns"); |
|---|
| 81 | xml_set_end_namespace_decl_handler($parser, "end_ns"); |
|---|
| 82 | |
|---|
| 83 | $this->content = ''; |
|---|
| 84 | |
|---|
| 85 | $ret = true; |
|---|
| 86 | |
|---|
| 87 | $fp = fopen($this->FILE, "r"); |
|---|
| 88 | while ($data = fread($fp, 4096)) { |
|---|
| 89 | if($this->debug) $this->content .= $data; |
|---|
| 90 | |
|---|
| 91 | if(!xml_parse($parser, $data, feof($fp))) { |
|---|
| 92 | trigger_error(sprintf(__('XML error: %s at line %d')."\n", |
|---|
| 93 | xml_error_string(xml_get_error_code($xml_parser)), |
|---|
| 94 | xml_get_current_line_number($xml_parser))); |
|---|
| 95 | $ret = false; |
|---|
| 96 | break; |
|---|
| 97 | } |
|---|
| 98 | } |
|---|
| 99 | fclose($fp); |
|---|
| 100 | |
|---|
| 101 | xml_parser_free($parser); |
|---|
| 102 | |
|---|
| 103 | restore_error_handler(); |
|---|
| 104 | |
|---|
| 105 | return $ret; |
|---|
| 106 | } |
|---|
| 107 | |
|---|
| 108 | function start_element($parser, $name, $attrs) { |
|---|
| 109 | |
|---|
| 110 | $tag = array_pop(split(":", $name)); |
|---|
| 111 | |
|---|
| 112 | switch($name) { |
|---|
| 113 | case $this->NS . ':feed': |
|---|
| 114 | $this->current = $this->feed; |
|---|
| 115 | break; |
|---|
| 116 | case $this->NS . ':entry': |
|---|
| 117 | $this->current = new AtomEntry(); |
|---|
| 118 | break; |
|---|
| 119 | }; |
|---|
| 120 | |
|---|
| 121 | $this->_p("start_element('$name')"); |
|---|
| 122 | #$this->_p(print_r($this->ns_contexts,true)); |
|---|
| 123 | #$this->_p('current(' . $this->current . ')'); |
|---|
| 124 | |
|---|
| 125 | array_unshift($this->ns_contexts, $this->ns_decls); |
|---|
| 126 | |
|---|
| 127 | $this->depth++; |
|---|
| 128 | |
|---|
| 129 | if(!empty($this->in_content)) { |
|---|
| 130 | |
|---|
| 131 | $this->content_ns_decls = array(); |
|---|
| 132 | |
|---|
| 133 | if($this->is_html || $this->is_text) |
|---|
| 134 | trigger_error("Invalid content in element found. Content must not be of type text or html if it contains markup."); |
|---|
| 135 | |
|---|
| 136 | $attrs_prefix = array(); |
|---|
| 137 | |
|---|
| 138 | // resolve prefixes for attributes |
|---|
| 139 | foreach($attrs as $key => $value) { |
|---|
| 140 | $with_prefix = $this->ns_to_prefix($key, true); |
|---|
| 141 | $attrs_prefix[$with_prefix[1]] = $this->xml_escape($value); |
|---|
| 142 | } |
|---|
| 143 | |
|---|
| 144 | $attrs_str = join(' ', array_map($this->map_attrs_func, array_keys($attrs_prefix), array_values($attrs_prefix))); |
|---|
| 145 | if(strlen($attrs_str) > 0) { |
|---|
| 146 | $attrs_str = " " . $attrs_str; |
|---|
| 147 | } |
|---|
| 148 | |
|---|
| 149 | $with_prefix = $this->ns_to_prefix($name); |
|---|
| 150 | |
|---|
| 151 | if(!$this->is_declared_content_ns($with_prefix[0])) { |
|---|
| 152 | array_push($this->content_ns_decls, $with_prefix[0]); |
|---|
| 153 | } |
|---|
| 154 | |
|---|
| 155 | $xmlns_str = ''; |
|---|
| 156 | if(count($this->content_ns_decls) > 0) { |
|---|
| 157 | array_unshift($this->content_ns_contexts, $this->content_ns_decls); |
|---|
| 158 | $xmlns_str .= join(' ', array_map($this->map_xmlns_func, array_keys($this->content_ns_contexts[0]), array_values($this->content_ns_contexts[0]))); |
|---|
| 159 | if(strlen($xmlns_str) > 0) { |
|---|
| 160 | $xmlns_str = " " . $xmlns_str; |
|---|
| 161 | } |
|---|
| 162 | } |
|---|
| 163 | |
|---|
| 164 | array_push($this->in_content, array($tag, $this->depth, "<". $with_prefix[1] ."{$xmlns_str}{$attrs_str}" . ">")); |
|---|
| 165 | |
|---|
| 166 | } else if(in_array($tag, $this->ATOM_CONTENT_ELEMENTS) || in_array($tag, $this->ATOM_SIMPLE_ELEMENTS)) { |
|---|
| 167 | $this->in_content = array(); |
|---|
| 168 | $this->is_xhtml = $attrs['type'] == 'xhtml'; |
|---|
| 169 | $this->is_html = $attrs['type'] == 'html' || $attrs['type'] == 'text/html'; |
|---|
| 170 | $this->is_text = !in_array('type',array_keys($attrs)) || $attrs['type'] == 'text'; |
|---|
| 171 | $type = $this->is_xhtml ? 'XHTML' : ($this->is_html ? 'HTML' : ($this->is_text ? 'TEXT' : $attrs['type'])); |
|---|
| 172 | |
|---|
| 173 | if(in_array('src',array_keys($attrs))) { |
|---|
| 174 | $this->current->$tag = $attrs; |
|---|
| 175 | } else { |
|---|
| 176 | array_push($this->in_content, array($tag,$this->depth, $type)); |
|---|
| 177 | } |
|---|
| 178 | } else if($tag == 'link') { |
|---|
| 179 | array_push($this->current->links, $attrs); |
|---|
| 180 | } else if($tag == 'category') { |
|---|
| 181 | array_push($this->current->categories, $attrs); |
|---|
| 182 | } |
|---|
| 183 | |
|---|
| 184 | $this->ns_decls = array(); |
|---|
| 185 | } |
|---|
| 186 | |
|---|
| 187 | function end_element($parser, $name) { |
|---|
| 188 | |
|---|
| 189 | $tag = array_pop(split(":", $name)); |
|---|
| 190 | |
|---|
| 191 | $ccount = count($this->in_content); |
|---|
| 192 | |
|---|
| 193 | # if we are *in* content, then let's proceed to serialize it |
|---|
| 194 | if(!empty($this->in_content)) { |
|---|
| 195 | # if we are ending the original content element |
|---|
| 196 | # then let's finalize the content |
|---|
| 197 | if($this->in_content[0][0] == $tag && |
|---|
| 198 | $this->in_content[0][1] == $this->depth) { |
|---|
| 199 | $origtype = $this->in_content[0][2]; |
|---|
| 200 | array_shift($this->in_content); |
|---|
| 201 | $newcontent = array(); |
|---|
| 202 | foreach($this->in_content as $c) { |
|---|
| 203 | if(count($c) == 3) { |
|---|
| 204 | array_push($newcontent, $c[2]); |
|---|
| 205 | } else { |
|---|
| 206 | if($this->is_xhtml) { |
|---|
| 207 | array_push($newcontent, $this->xml_escape($c)); |
|---|
| 208 | } else { |
|---|
| 209 | array_push($newcontent, $c); |
|---|
| 210 | } |
|---|
| 211 | } |
|---|
| 212 | } |
|---|
| 213 | if(in_array($tag, $this->ATOM_CONTENT_ELEMENTS)) { |
|---|
| 214 | $this->current->$tag = array($origtype, join('',$newcontent)); |
|---|
| 215 | } else { |
|---|
| 216 | $this->current->$tag = join('',$newcontent); |
|---|
| 217 | } |
|---|
| 218 | $this->in_content = array(); |
|---|
| 219 | } else if($this->in_content[$ccount-1][0] == $tag && |
|---|
| 220 | $this->in_content[$ccount-1][1] == $this->depth) { |
|---|
| 221 | $this->in_content[$ccount-1][2] = substr($this->in_content[$ccount-1][2],0,-1) . "/>"; |
|---|
| 222 | } else { |
|---|
| 223 | # else, just finalize the current element's content |
|---|
| 224 | $endtag = $this->ns_to_prefix($name); |
|---|
| 225 | array_push($this->in_content, array($tag, $this->depth, "</$endtag[1]>")); |
|---|
| 226 | } |
|---|
| 227 | } |
|---|
| 228 | |
|---|
| 229 | array_shift($this->ns_contexts); |
|---|
| 230 | |
|---|
| 231 | $this->depth--; |
|---|
| 232 | |
|---|
| 233 | if($name == ($this->NS . ':entry')) { |
|---|
| 234 | array_push($this->feed->entries, $this->current); |
|---|
| 235 | $this->current = null; |
|---|
| 236 | } |
|---|
| 237 | |
|---|
| 238 | $this->_p("end_element('$name')"); |
|---|
| 239 | } |
|---|
| 240 | |
|---|
| 241 | function start_ns($parser, $prefix, $uri) { |
|---|
| 242 | $this->_p("starting: " . $prefix . ":" . $uri); |
|---|
| 243 | array_push($this->ns_decls, array($prefix,$uri)); |
|---|
| 244 | } |
|---|
| 245 | |
|---|
| 246 | function end_ns($parser, $prefix) { |
|---|
| 247 | $this->_p("ending: #" . $prefix . "#"); |
|---|
| 248 | } |
|---|
| 249 | |
|---|
| 250 | function cdata($parser, $data) { |
|---|
| 251 | $this->_p("data: #" . str_replace(array("\n"), array("\\n"), trim($data)) . "#"); |
|---|
| 252 | if(!empty($this->in_content)) { |
|---|
| 253 | array_push($this->in_content, $data); |
|---|
| 254 | } |
|---|
| 255 | } |
|---|
| 256 | |
|---|
| 257 | function _default($parser, $data) { |
|---|
| 258 | # when does this gets called? |
|---|
| 259 | } |
|---|
| 260 | |
|---|
| 261 | |
|---|
| 262 | function ns_to_prefix($qname, $attr=false) { |
|---|
| 263 | # split 'http://www.w3.org/1999/xhtml:div' into ('http','//www.w3.org/1999/xhtml','div') |
|---|
| 264 | $components = split(":", $qname); |
|---|
| 265 | |
|---|
| 266 | # grab the last one (e.g 'div') |
|---|
| 267 | $name = array_pop($components); |
|---|
| 268 | |
|---|
| 269 | if(!empty($components)) { |
|---|
| 270 | # re-join back the namespace component |
|---|
| 271 | $ns = join(":",$components); |
|---|
| 272 | foreach($this->ns_contexts as $context) { |
|---|
| 273 | foreach($context as $mapping) { |
|---|
| 274 | if($mapping[1] == $ns && strlen($mapping[0]) > 0) { |
|---|
| 275 | return array($mapping, "$mapping[0]:$name"); |
|---|
| 276 | } |
|---|
| 277 | } |
|---|
| 278 | } |
|---|
| 279 | } |
|---|
| 280 | |
|---|
| 281 | if($attr) { |
|---|
| 282 | return array(null, $name); |
|---|
| 283 | } else { |
|---|
| 284 | foreach($this->ns_contexts as $context) { |
|---|
| 285 | foreach($context as $mapping) { |
|---|
| 286 | if(strlen($mapping[0]) == 0) { |
|---|
| 287 | return array($mapping, $name); |
|---|
| 288 | } |
|---|
| 289 | } |
|---|
| 290 | } |
|---|
| 291 | } |
|---|
| 292 | } |
|---|
| 293 | |
|---|
| 294 | function is_declared_content_ns($new_mapping) { |
|---|
| 295 | foreach($this->content_ns_contexts as $context) { |
|---|
| 296 | foreach($context as $mapping) { |
|---|
| 297 | if($new_mapping == $mapping) { |
|---|
| 298 | return true; |
|---|
| 299 | } |
|---|
| 300 | } |
|---|
| 301 | } |
|---|
| 302 | return false; |
|---|
| 303 | } |
|---|
| 304 | |
|---|
| 305 | function xml_escape($string) |
|---|
| 306 | { |
|---|
| 307 | return str_replace(array('&','"',"'",'<','>'), |
|---|
| 308 | array('&','"',''','<','>'), |
|---|
| 309 | $string ); |
|---|
| 310 | } |
|---|
| 311 | } |
|---|
| 312 | |
|---|
| 313 | ?> |
|---|