879 | | class AtomEntry { |
880 | | var $links = array(); |
881 | | var $categories = array(); |
882 | | } |
883 | | |
884 | | class AtomParser { |
885 | | |
886 | | var $ATOM_CONTENT_ELEMENTS = array('content','summary','title','subtitle','rights'); |
887 | | var $ATOM_SIMPLE_ELEMENTS = array('id','updated','published','draft','author'); |
888 | | |
889 | | var $depth = 0; |
890 | | var $indent = 2; |
891 | | var $in_content; |
892 | | var $ns_contexts = array(); |
893 | | var $ns_decls = array(); |
894 | | var $is_xhtml = false; |
895 | | var $skipped_div = false; |
896 | | |
897 | | var $entry; |
898 | | |
899 | | function AtomParser() { |
900 | | |
901 | | $this->entry = new AtomEntry(); |
902 | | $this->map_attrs_func = create_function('$k,$v', 'return "$k=\"$v\"";'); |
903 | | $this->map_xmlns_func = create_function('$p,$n', '$xd = "xmlns"; if(strlen($n[0])>0) $xd .= ":{$n[0]}"; return "{$xd}=\"{$n[1]}\"";'); |
904 | | } |
905 | | |
906 | | function parse($xml) { |
907 | | |
908 | | global $app_logging; |
909 | | array_unshift($this->ns_contexts, array()); |
910 | | |
911 | | $parser = xml_parser_create_ns(); |
912 | | xml_set_object($parser, $this); |
913 | | xml_set_element_handler($parser, "start_element", "end_element"); |
914 | | xml_parser_set_option($parser,XML_OPTION_CASE_FOLDING,0); |
915 | | xml_parser_set_option($parser,XML_OPTION_SKIP_WHITE,0); |
916 | | xml_set_character_data_handler($parser, "cdata"); |
917 | | xml_set_default_handler($parser, "_default"); |
918 | | xml_set_start_namespace_decl_handler($parser, "start_ns"); |
919 | | xml_set_end_namespace_decl_handler($parser, "end_ns"); |
920 | | |
921 | | $contents = ""; |
922 | | |
923 | | xml_parse($parser, $xml); |
924 | | |
925 | | xml_parser_free($parser); |
926 | | |
927 | | return true; |
928 | | } |
929 | | |
930 | | function start_element($parser, $name, $attrs) { |
931 | | |
932 | | $tag = array_pop(split(":", $name)); |
933 | | |
934 | | array_unshift($this->ns_contexts, $this->ns_decls); |
935 | | |
936 | | $this->depth++; |
937 | | |
938 | | if(!empty($this->in_content)) { |
939 | | $attrs_prefix = array(); |
940 | | |
941 | | // resolve prefixes for attributes |
942 | | foreach($attrs as $key => $value) { |
943 | | $attrs_prefix[$this->ns_to_prefix($key)] = $this->xml_escape($value); |
944 | | } |
945 | | $attrs_str = join(' ', array_map($this->map_attrs_func, array_keys($attrs_prefix), array_values($attrs_prefix))); |
946 | | if(strlen($attrs_str) > 0) { |
947 | | $attrs_str = " " . $attrs_str; |
948 | | } |
949 | | |
950 | | $xmlns_str = join(' ', array_map($this->map_xmlns_func, array_keys($this->ns_contexts[0]), array_values($this->ns_contexts[0]))); |
951 | | if(strlen($xmlns_str) > 0) { |
952 | | $xmlns_str = " " . $xmlns_str; |
953 | | } |
954 | | |
955 | | // handle self-closing tags (case: a new child found right-away, no text node) |
956 | | if(count($this->in_content) == 2) { |
957 | | array_push($this->in_content, ">"); |
958 | | } |
959 | | |
960 | | array_push($this->in_content, "<". $this->ns_to_prefix($name) ."{$xmlns_str}{$attrs_str}"); |
961 | | } else if(in_array($tag, $this->ATOM_CONTENT_ELEMENTS) || in_array($tag, $this->ATOM_SIMPLE_ELEMENTS)) { |
962 | | $this->in_content = array(); |
963 | | $this->is_xhtml = $attrs['type'] == 'xhtml'; |
964 | | array_push($this->in_content, array($tag,$this->depth)); |
965 | | } else if($tag == 'link') { |
966 | | array_push($this->entry->links, $attrs); |
967 | | } else if($tag == 'category') { |
968 | | array_push($this->entry->categories, $attrs['term']); |
969 | | } |
970 | | |
971 | | $this->ns_decls = array(); |
972 | | } |
973 | | |
974 | | function end_element($parser, $name) { |
975 | | |
976 | | $tag = array_pop(split(":", $name)); |
977 | | |
978 | | if(!empty($this->in_content)) { |
979 | | if($this->in_content[0][0] == $tag && |
980 | | $this->in_content[0][1] == $this->depth) { |
981 | | array_shift($this->in_content); |
982 | | if($this->is_xhtml) { |
983 | | $this->in_content = array_slice($this->in_content, 2, count($this->in_content)-3); |
984 | | } |
985 | | $this->entry->$tag = join('',$this->in_content); |
986 | | $this->in_content = array(); |
987 | | } else { |
988 | | $endtag = $this->ns_to_prefix($name); |
989 | | if (strpos($this->in_content[count($this->in_content)-1], '<' . $endtag) !== false) { |
990 | | array_push($this->in_content, "/>"); |
991 | | } else { |
992 | | array_push($this->in_content, "</$endtag>"); |
993 | | } |
994 | | } |
995 | | } |
996 | | |
997 | | array_shift($this->ns_contexts); |
998 | | |
999 | | #print str_repeat(" ", $this->depth * $this->indent) . "end_element('$name')" ."\n"; |
1000 | | |
1001 | | $this->depth--; |
1002 | | } |
1003 | | |
1004 | | function start_ns($parser, $prefix, $uri) { |
1005 | | #print str_repeat(" ", $this->depth * $this->indent) . "starting: " . $prefix . ":" . $uri . "\n"; |
1006 | | array_push($this->ns_decls, array($prefix,$uri)); |
1007 | | } |
1008 | | |
1009 | | function end_ns($parser, $prefix) { |
1010 | | #print str_repeat(" ", $this->depth * $this->indent) . "ending: #" . $prefix . "#\n"; |
1011 | | } |
1012 | | |
1013 | | function cdata($parser, $data) { |
1014 | | #print str_repeat(" ", $this->depth * $this->indent) . "data: #" . $data . "#\n"; |
1015 | | if(!empty($this->in_content)) { |
1016 | | // handle self-closing tags (case: text node found, need to close element started) |
1017 | | if (strpos($this->in_content[count($this->in_content)-1], '<') !== false) { |
1018 | | array_push($this->in_content, ">"); |
1019 | | } |
1020 | | array_push($this->in_content, $this->xml_escape($data)); |
1021 | | } |
1022 | | } |
1023 | | |
1024 | | function _default($parser, $data) { |
1025 | | # when does this gets called? |
1026 | | } |
1027 | | |
1028 | | |
1029 | | function ns_to_prefix($qname) { |
1030 | | $components = split(":", $qname); |
1031 | | $name = array_pop($components); |
1032 | | |
1033 | | if(!empty($components)) { |
1034 | | $ns = join(":",$components); |
1035 | | foreach($this->ns_contexts as $context) { |
1036 | | foreach($context as $mapping) { |
1037 | | if($mapping[1] == $ns && strlen($mapping[0]) > 0) { |
1038 | | return "$mapping[0]:$name"; |
1039 | | } |
1040 | | } |
1041 | | } |
1042 | | } |
1043 | | return $name; |
1044 | | } |
1045 | | |
1046 | | function xml_escape($string) |
1047 | | { |
1048 | | return str_replace(array('&','"',"'",'<','>'), |
1049 | | array('&','"',''','<','>'), |
1050 | | $string ); |
1051 | | } |
1052 | | } |
1053 | | |