Ticket #7652: 7652-blogger.diff
File 7652-blogger.diff, 18.3 KB (added by , 14 years ago) |
---|
-
blogger-importer.php
15 15 16 16 // Load Importer API 17 17 require_once ABSPATH . 'wp-admin/includes/import.php'; 18 require_once ABSPATH . 'wp-includes/class-feed.php'; 18 19 19 20 if ( !class_exists( 'WP_Importer' ) ) { 20 21 $class_wp_importer = ABSPATH . 'wp-admin/includes/class-wp-importer.php'; … … 152 153 if ( ! $sock ) return; 153 154 $response = $this->_txrx( $sock, $request ); 154 155 155 // Quick and dirty XML mining.156 156 list( $headers, $xml ) = explode( "\r\n\r\n", $response ); 157 $ p = xml_parser_create();158 xml_parse_into_struct($p, $xml, $vals, $index);159 xml_parser_free($p);157 $feed = new SimplePie(); 158 $feed->set_raw_data($xml); 159 $feed->init(); 160 160 161 $this->title = $ vals[$index['TITLE'][0]]['value'];161 $this->title = $feed->get_title(); 162 162 163 163 // Give it a few retries... this step often flakes out the first time. 164 if ( empty( $index['ENTRY']) ) {164 if ( $feed->error() ) { 165 165 if ( $iter < 3 ) { 166 166 return $this->show_blogs($iter + 1); 167 167 } else { … … 174 174 } 175 175 } 176 176 177 foreach ( $ index['ENTRY'] as $i) {177 foreach ( $feed->get_items() as $item ) { 178 178 $blog = array(); 179 while ( ( $tag = $vals[$i] ) && ! ( $tag['tag'] == 'ENTRY' && $tag['type'] == 'close' ) ) {180 if ( $tag['tag'] == 'TITLE' ) {181 $blog['title'] = $tag['value'];182 } elseif ( $tag['tag'] == 'SUMMARY') {183 $blog['summary'] = $tag['value'];184 } elseif ( $tag['tag'] == 'LINK') {185 if ( $tag['attributes']['REL'] == 'alternate' && $tag['attributes']['TYPE'] == 'text/html' ) {186 $parts = parse_url( $ tag['attributes']['HREF']);179 $blog['title'] = $item->get_title(); 180 $blog['summary'] = $item->get_description(); 181 // Messy, but SimplePie doesn't let us get a link by "type" 182 foreach ((array) $item->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'link') as $link) { 183 $attribs = $link['attribs']['']; 184 if (isset($attribs['href'])) { 185 if ( ( !isset($attribs['rel']) || $attribs['rel'] == 'alternate' ) && isset($attribs['type']) && $attribs['type'] == 'text/html' ) { 186 $parts = parse_url( $item->sanitize($attribs['href'], SIMPLEPIE_CONSTRUCT_IRI, $item->get_base($link)) ); 187 187 $blog['host'] = $parts['host']; 188 } elseif ( $tag['attributes']['REL'] == 'edit' ) 189 $blog['gateway'] = $tag['attributes']['HREF']; 188 } elseif ( $attribs['rel'] == 'edit' ) { 189 $blog['gateway'] = $item->sanitize($attribs['href'], SIMPLEPIE_CONSTRUCT_IRI, $item->get_base($link)); 190 } 190 191 } 191 ++$i;192 192 } 193 193 if ( ! empty ( $blog ) ) { 194 194 $blog['total_posts'] = $this->get_total_results('posts', $blog['host']); … … 197 197 $this->blogs[] = $blog; 198 198 } 199 199 } 200 unset($feed); 200 201 201 202 if ( empty( $this->blogs ) ) { 202 203 $this->uh_oh( … … 382 383 if ( ! $sock ) return; 383 384 $response = $this->_txrx( $sock, $request ); 384 385 $response = $this->parse_response( $response ); 385 $parser = xml_parser_create(); 386 xml_parse_into_struct($parser, $response['body'], $struct, $index); 387 xml_parser_free($parser); 388 $total_results = $struct[$index['OPENSEARCH:TOTALRESULTS'][0]]['value']; 386 $feed = new SimplePie(); 387 $feed->set_raw_data($response['body']); 388 $feed->init(); 389 $results = $feed->get_channel_tags('http://a9.com/-/spec/opensearchrss/1.0/', 'totalResults'); 390 $total_results = $results[0]['data']; 391 unset($feed); 389 392 return (int) $total_results; 390 393 } 391 394 … … 436 439 437 440 $response = $this->parse_response( $response ); 438 441 442 $feed = new SimplePie(); 443 $feed->set_item_class('WP_SimplePieAtomPub_Item'); 444 $feed->set_raw_data($response['body']); 445 $feed->init(); 446 439 447 // Extract the entries and send for insertion 440 preg_match_all( '/<entry[^>]*>.*?<\/entry>/s', $response['body'], $matches ); 441 if ( count( $matches[0] ) ) { 442 $entries = array_reverse($matches[0]); 443 foreach ( $entries as $entry ) { 444 $entry = "<feed>$entry</feed>"; 445 $AtomParser = new AtomParser(); 446 $AtomParser->parse( $entry ); 447 $result = $this->import_post($AtomParser->entry); 448 if ( $feed->get_item_quantity() > 0 ) { 449 foreach ( $feed->get_items() as $item ) { 450 $result = $this->import_post($item); 448 451 if ( is_wp_error( $result ) ) 449 452 return $result; 450 unset($ AtomParser);453 unset($item); 451 454 } 452 455 } else break; 453 456 454 457 // Get the 'previous' query string which we'll use on the next iteration 455 $query = ''; 456 $links = preg_match_all('/<link([^>]*)>/', $response['body'], $matches); 457 if ( count( $matches[1] ) ) 458 foreach ( $matches[1] as $match ) 459 if ( preg_match('/rel=.previous./', $match) ) 460 $query = @html_entity_decode( preg_replace('/^.*href=[\'"].*\?(.+)[\'"].*$/', '$1', $match), ENT_COMPAT, get_option('blog_charset') ); 458 $query = $feed->get_link(0, 'previous'); 459 unset($feed); 461 460 462 461 if ( $query ) { 463 462 parse_str($query, $q); … … 496 495 497 496 $response = $this->parse_response( $response ); 498 497 498 $feed = new SimplePie(); 499 $feed->set_raw_data($response['body']); 500 $feed->init(); 501 499 502 // Extract the comments and send for insertion 500 preg_match_all( '/<entry[^>]*>.*?<\/entry>/s', $response['body'], $matches ); 501 if ( count( $matches[0] ) ) { 502 $entries = array_reverse( $matches[0] ); 503 foreach ( $entries as $entry ) { 504 $entry = "<feed>$entry</feed>"; 505 $AtomParser = new AtomParser(); 506 $AtomParser->parse( $entry ); 507 $this->import_comment($AtomParser->entry); 508 unset($AtomParser); 503 if ( $feed->get_item_quantity() < 1 ) { 504 foreach ( $feed->get_items() as $item ) { 505 $this->import_comment($item); 506 unset($item); 509 507 } 510 508 } 511 509 512 510 // Get the 'previous' query string which we'll use on the next iteration 513 $query = ''; 514 $links = preg_match_all('/<link([^>]*)>/', $response['body'], $matches); 515 if ( count( $matches[1] ) ) 516 foreach ( $matches[1] as $match ) 517 if ( preg_match('/rel=.previous./', $match) ) 518 $query = @html_entity_decode( preg_replace('/^.*href=[\'"].*\?(.+)[\'"].*$/', '$1', $match), ENT_COMPAT, get_option('blog_charset') ); 511 $query = $feed->get_link(0, 'previous'); 512 unset($feed); 519 513 520 514 parse_str($query, $q); 521 515 … … 531 525 die('done'); 532 526 } 533 527 534 function convert_date( $date ) { 535 preg_match('#([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(?:\.[0-9]+)?(Z|[\+|\-][0-9]{2,4}){0,1}#', $date, $date_bits); 536 $offset = iso8601_timezone_to_offset( $date_bits[7] ); 537 $timestamp = gmmktime($date_bits[4], $date_bits[5], $date_bits[6], $date_bits[2], $date_bits[3], $date_bits[1]); 538 $timestamp -= $offset; // Convert from Blogger local time to GMT 539 $timestamp += get_option('gmt_offset') * 3600; // Convert from GMT to WP local time 540 return gmdate('Y-m-d H:i:s', $timestamp); 528 function convert_category( $category ) { 529 $cat = $category->get_label(); 530 return addslashes($cat); 541 531 } 542 532 543 function no_apos( $string ) { 544 return str_replace( ''', "'", $string); 545 } 546 547 function min_whitespace( $string ) { 548 return preg_replace( '|\s+|', ' ', $string ); 549 } 550 551 function _normalize_tag( $matches ) { 552 return '<' . strtolower( $matches[1] ); 553 } 554 555 function import_post( $entry ) { 533 function import_post( &$entry ) { 556 534 global $importing_blog; 557 535 558 536 // The old permalink is all Blogger gives us to link comments to their posts. 559 if ( isset( $entry->draft) )537 if ( $entry->get_draft_status() ) 560 538 $rel = 'self'; 561 539 else 562 540 $rel = 'alternate'; 563 foreach ( $entry->links as $link ) { 564 if ( $link['rel'] == $rel ) { 565 $parts = parse_url( $link['href'] ); 566 $entry->old_permalink = $parts['path']; 567 break; 568 } 569 } 541 $parts = parse_url( $entry->get_link(0, $rel) ); 542 $old_permalink = $parts['path']; 570 543 571 $post_date = $this->convert_date( $entry->published);572 $post_content = trim( addslashes( $this->no_apos( @html_entity_decode( $entry->content, ENT_COMPAT, get_option('blog_charset') ) )) );573 $post_title = trim( addslashes( $this->no_apos( $this->min_whitespace( $entry->title ) )) );574 $post_status = isset( $entry->draft) ? 'draft' : 'publish';544 $post_date = gmdate('Y-m-d H:i:s', $entry->get_gmdate('U') + get_option('gmt_offset') * 3600); 545 $post_content = addslashes( $entry->get_content() ); 546 $post_title = addslashes( $entry->get_title() ); 547 $post_status = $entry->get_draft_status() ? 'draft' : 'publish'; 575 548 576 // Clean up content577 $post_content = preg_replace_callback('|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_content);578 $post_content = str_replace('<br>', '<br />', $post_content);579 $post_content = str_replace('<hr>', '<hr />', $post_content);580 581 549 // Checks for duplicates 582 if ( isset( $this->blogs[$importing_blog]['posts'][$ entry->old_permalink] ) ) {550 if ( isset( $this->blogs[$importing_blog]['posts'][$old_permalink] ) ) { 583 551 ++$this->blogs[$importing_blog]['posts_skipped']; 584 552 } elseif ( $post_id = post_exists( $post_title, $post_content, $post_date ) ) { 585 $this->blogs[$importing_blog]['posts'][$ entry->old_permalink] = $post_id;553 $this->blogs[$importing_blog]['posts'][$old_permalink] = $post_id; 586 554 ++$this->blogs[$importing_blog]['posts_skipped']; 587 555 } else { 588 556 $post = compact('post_date', 'post_content', 'post_title', 'post_status'); … … 591 559 if ( is_wp_error( $post_id ) ) 592 560 return $post_id; 593 561 594 wp_create_categories( array_map( 'addslashes', $entry->categories), $post_id );562 wp_create_categories( array_map( array(&$this, 'convert_category'), (array) $entry->get_categories() ), $post_id ); 595 563 596 $author = $ this->no_apos( strip_tags( $entry->author ));564 $author = $entry->get_author(); 597 565 598 566 add_post_meta( $post_id, 'blogger_blog', $this->blogs[$importing_blog]['host'], true ); 599 add_post_meta( $post_id, 'blogger_author', $author, true ); 600 add_post_meta( $post_id, 'blogger_permalink', $entry->old_permalink, true ); 567 add_post_meta( $post_id, 'blogger_author', $author->get_name(), true ); 568 add_post_meta( $post_id, 'blogger_authoruri', $author->get_link(), true ); 569 add_post_meta( $post_id, 'blogger_permalink', $old_permalink, true ); 601 570 602 $this->blogs[$importing_blog]['posts'][$ entry->old_permalink] = $post_id;571 $this->blogs[$importing_blog]['posts'][$old_permalink] = $post_id; 603 572 ++$this->blogs[$importing_blog]['posts_done']; 604 573 } 605 574 $this->save_vars(); 606 575 return; 607 576 } 608 577 609 function import_comment( $entry ) {578 function import_comment( &$entry ) { 610 579 global $importing_blog; 611 580 612 581 // Drop the #fragment and we have the comment's old post permalink. 613 foreach ( $entry->links as $link ) { 614 if ( $link['rel'] == 'alternate' ) { 615 $parts = parse_url( $link['href'] ); 616 $entry->old_permalink = $parts['fragment']; 617 $entry->old_post_permalink = $parts['path']; 618 break; 619 } 620 } 582 $parts = parse_url( $entry->get_link(0, 'alternate') ); 583 $old_permalink = $parts['fragment']; 584 $old_post_permalink = $parts['path']; 621 585 622 $comment_post_ID = (int) $this->blogs[$importing_blog]['posts'][$ entry->old_post_permalink];623 preg_match('#<name>(.+?)</name>.*(?:\<uri>(.+?)</uri>)?#', $entry->author, $matches);624 $comment_author = addslashes( $ this->no_apos( strip_tags( (string) $matches[1] )) );625 $comment_author_url = addslashes( $ this->no_apos( strip_tags( (string) $matches[2] )) );626 $comment_date = $this->convert_date( $entry->updated);627 $comment_content = addslashes( $ this->no_apos( @html_entity_decode( $entry->content, ENT_COMPAT, get_option('blog_charset') )) );586 $comment_post_ID = (int) $this->blogs[$importing_blog]['posts'][$old_post_permalink]; 587 $author = $entry->get_author(); 588 $comment_author = addslashes( $author->get_name() ); 589 $comment_author_url = addslashes( $author->get_uri() ); 590 $comment_date = gmdate('Y-m-d H:i:s', $entry->get_gmupdated('U') + get_option('gmt_offset') * 3600); 591 $comment_content = addslashes( $entry->get_content() ); 628 592 629 // Clean up content630 $comment_content = preg_replace_callback('|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $comment_content);631 $comment_content = str_replace('<br>', '<br />', $comment_content);632 $comment_content = str_replace('<hr>', '<hr />', $comment_content);633 634 593 // Checks for duplicates 635 594 if ( 636 isset( $this->blogs[$importing_blog]['comments'][$ entry->old_permalink] ) ||595 isset( $this->blogs[$importing_blog]['comments'][$old_permalink] ) || 637 596 comment_exists( $comment_author, $comment_date ) 638 597 ) { 639 598 ++$this->blogs[$importing_blog]['comments_skipped']; … … 643 602 $comment = wp_filter_comment($comment); 644 603 $comment_id = wp_insert_comment($comment); 645 604 646 $this->blogs[$importing_blog]['comments'][$ entry->old_permalink] = $comment_id;605 $this->blogs[$importing_blog]['comments'][$old_permalink] = $comment_id; 647 606 648 607 ++$this->blogs[$importing_blog]['comments_done']; 649 608 } … … 907 866 $blogger_import = new Blogger_Import(); 908 867 909 868 register_importer('blogger', __('Blogger', 'blogger-importer'), __('Import posts, comments, and users from a Blogger blog.', 'blogger-importer'), array ($blogger_import, 'start')); 910 911 class AtomEntry {912 var $links = array();913 var $categories = array();914 }915 916 class AtomParser {917 918 var $ATOM_CONTENT_ELEMENTS = array('content','summary','title','subtitle','rights');919 var $ATOM_SIMPLE_ELEMENTS = array('id','updated','published','draft','author');920 921 var $depth = 0;922 var $indent = 2;923 var $in_content;924 var $ns_contexts = array();925 var $ns_decls = array();926 var $is_xhtml = false;927 var $skipped_div = false;928 929 var $entry;930 931 function AtomParser() {932 $this->entry = new AtomEntry();933 }934 935 function _map_attrs_func( $k, $v ) {936 return "$k=\"$v\"";937 }938 939 function _map_xmlns_func( $p, $n ) {940 $xd = "xmlns";941 if ( strlen( $n[0] ) > 0 )942 $xd .= ":{$n[0]}";943 944 return "{$xd}=\"{$n[1]}\"";945 }946 947 function parse($xml) {948 949 global $app_logging;950 array_unshift($this->ns_contexts, array());951 952 $parser = xml_parser_create_ns();953 xml_set_object($parser, $this);954 xml_set_element_handler($parser, "start_element", "end_element");955 xml_parser_set_option($parser,XML_OPTION_CASE_FOLDING,0);956 xml_parser_set_option($parser,XML_OPTION_SKIP_WHITE,0);957 xml_set_character_data_handler($parser, "cdata");958 xml_set_default_handler($parser, "_default");959 xml_set_start_namespace_decl_handler($parser, "start_ns");960 xml_set_end_namespace_decl_handler($parser, "end_ns");961 962 $contents = "";963 964 xml_parse($parser, $xml);965 966 xml_parser_free($parser);967 968 return true;969 }970 971 function start_element($parser, $name, $attrs) {972 973 $tag = array_pop(split(":", $name));974 975 array_unshift($this->ns_contexts, $this->ns_decls);976 977 $this->depth++;978 979 if (!empty($this->in_content)) {980 $attrs_prefix = array();981 982 // resolve prefixes for attributes983 foreach($attrs as $key => $value) {984 $attrs_prefix[$this->ns_to_prefix($key)] = $this->xml_escape($value);985 }986 $attrs_str = join(' ', array_map( array( &$this, '_map_attrs_func' ), array_keys($attrs_prefix), array_values($attrs_prefix)));987 if (strlen($attrs_str) > 0) {988 $attrs_str = " " . $attrs_str;989 }990 991 $xmlns_str = join(' ', array_map( array( &$this, '_map_xmlns_func' ), array_keys($this->ns_contexts[0]), array_values($this->ns_contexts[0])));992 if (strlen($xmlns_str) > 0) {993 $xmlns_str = " " . $xmlns_str;994 }995 996 // handle self-closing tags (case: a new child found right-away, no text node)997 if (count($this->in_content) == 2) {998 array_push($this->in_content, ">");999 }1000 1001 array_push($this->in_content, "<". $this->ns_to_prefix($name) ."{$xmlns_str}{$attrs_str}");1002 } else if (in_array($tag, $this->ATOM_CONTENT_ELEMENTS) || in_array($tag, $this->ATOM_SIMPLE_ELEMENTS)) {1003 $this->in_content = array();1004 $this->is_xhtml = $attrs['type'] == 'xhtml';1005 array_push($this->in_content, array($tag,$this->depth));1006 } else if ($tag == 'link') {1007 array_push($this->entry->links, $attrs);1008 } else if ($tag == 'category') {1009 array_push($this->entry->categories, $attrs['term']);1010 }1011 1012 $this->ns_decls = array();1013 }1014 1015 function end_element($parser, $name) {1016 1017 $tag = array_pop(split(":", $name));1018 1019 if (!empty($this->in_content)) {1020 if ($this->in_content[0][0] == $tag &&1021 $this->in_content[0][1] == $this->depth) {1022 array_shift($this->in_content);1023 if ($this->is_xhtml) {1024 $this->in_content = array_slice($this->in_content, 2, count($this->in_content)-3);1025 }1026 $this->entry->$tag = join('',$this->in_content);1027 $this->in_content = array();1028 } else {1029 $endtag = $this->ns_to_prefix($name);1030 if (strpos($this->in_content[count($this->in_content)-1], '<' . $endtag) !== false) {1031 array_push($this->in_content, "/>");1032 } else {1033 array_push($this->in_content, "</$endtag>");1034 }1035 }1036 }1037 1038 array_shift($this->ns_contexts);1039 1040 #print str_repeat(" ", $this->depth * $this->indent) . "end_element('$name')" ."\n";1041 1042 $this->depth--;1043 }1044 1045 function start_ns($parser, $prefix, $uri) {1046 #print str_repeat(" ", $this->depth * $this->indent) . "starting: " . $prefix . ":" . $uri . "\n";1047 array_push($this->ns_decls, array($prefix,$uri));1048 }1049 1050 function end_ns($parser, $prefix) {1051 #print str_repeat(" ", $this->depth * $this->indent) . "ending: #" . $prefix . "#\n";1052 }1053 1054 function cdata($parser, $data) {1055 #print str_repeat(" ", $this->depth * $this->indent) . "data: #" . $data . "#\n";1056 if (!empty($this->in_content)) {1057 // handle self-closing tags (case: text node found, need to close element started)1058 if (strpos($this->in_content[count($this->in_content)-1], '<') !== false) {1059 array_push($this->in_content, ">");1060 }1061 array_push($this->in_content, $this->xml_escape($data));1062 }1063 }1064 1065 function _default($parser, $data) {1066 # when does this gets called?1067 }1068 1069 1070 function ns_to_prefix($qname) {1071 $components = split(":", $qname);1072 $name = array_pop($components);1073 1074 if (!empty($components)) {1075 $ns = join(":",$components);1076 foreach ($this->ns_contexts as $context) {1077 foreach ($context as $mapping) {1078 if ($mapping[1] == $ns && strlen($mapping[0]) > 0) {1079 return "$mapping[0]:$name";1080 }1081 }1082 }1083 }1084 return $name;1085 }1086 1087 function xml_escape($string)1088 {1089 return str_replace(array('&','"',"'",'<','>'),1090 array('&','"',''','<','>'),1091 $string );1092 }1093 }1094 869 } // class_exists( 'WP_Importer' ) 1095 870 1096 871 function blogger_importer_init() {