Ticket #15197: 15197-import.diff
File 15197-import.diff, 75.2 KB (added by , 15 years ago) |
---|
-
trunk/parsers.php
1 <?php 2 /** 3 * WordPress eXtended RSS file parser implementations 4 * 5 * @package WordPress 6 * @subpackage Importer 7 */ 8 9 /** 10 * WordPress Importer class for managing parsing of WXR files 11 */ 12 class WXR_Parser { 13 function parse( $file ) { 14 if ( extension_loaded( 'simplexml' ) ) 15 $parser = new WXR_Parser_SimpleXML; 16 else if ( extension_loaded( 'xml' ) ) 17 $parser = new WXR_Parser_XML; 18 else 19 $parser = new WXR_Parser_Regex; 20 21 return $parser->parse( $file ); 22 } 23 } 24 25 /** 26 * WXR Parser that makes use of the SimpleXML PHP extension 27 */ 28 class WXR_Parser_SimpleXML { 29 function parse( $file ) { 30 $authors = $posts = $categories = $tags = $terms = array(); 31 32 $internal_errors = libxml_use_internal_errors(true); 33 $xml = simplexml_load_file( $file ); 34 // halt if loading produces an error 35 if ( ! $xml ) 36 return new WP_Error( 'WXR_parse_error', __( 'There was an error when reading this WXR file', 'wordpress-importer' ) ); 37 38 $wxr_version = $xml->xpath('/rss/channel/wp:wxr_version'); 39 if ( ! $wxr_version ) 40 return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); 41 42 $wxr_version = (string) trim( $wxr_version[0] ); 43 // confirm that we are dealing with the correct file format 44 if ( ! preg_match( '/^\d\.\d$/', $wxr_version ) ) 45 return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); 46 47 $base_url = $xml->xpath('/rss/channel/wp:base_site_url'); 48 $base_url = (string) trim( $base_url[0] ); 49 50 $namespaces = $xml->getDocNamespaces(); 51 if ( ! isset( $namespaces['wp'] ) ) 52 $namespaces['wp'] = 'http://wordpress.org/export/1.1/'; 53 if ( ! isset( $namespaces['excerpt'] ) ) 54 $namespaces['excerpt'] = 'http://wordpress.org/export/1.1/excerpt/'; 55 56 // grab authors 57 foreach ( $xml->xpath('/rss/channel/wp:author') as $author_arr ) { 58 $a = $author_arr->children( $namespaces['wp'] ); 59 $login = (string) $a->author_login; 60 $authors[$login] = array( 61 'author_login' => $login, 62 'author_email' => (string) $a->author_email, 63 'author_display_name' => (string) $a->author_display_name, 64 'author_first_name' => (string) $a->author_first_name, 65 'author_last_name' => (string) $a->author_last_name 66 ); 67 } 68 69 // grab cats, tags and terms 70 foreach ( $xml->xpath('/rss/channel/wp:category') as $term_arr ) { 71 $t = $term_arr->children( $namespaces['wp'] ); 72 $categories[] = array( 73 'term_id' => (int) $t->term_id, 74 'category_nicename' => (string) $t->category_nicename, 75 'category_parent' => (string) $t->category_parent, 76 'cat_name' => (string) $t->cat_name, 77 'category_description' => (string) $t->category_description 78 ); 79 } 80 81 foreach ( $xml->xpath('/rss/channel/wp:tag') as $term_arr ) { 82 $t = $term_arr->children( $namespaces['wp'] ); 83 $tags[] = array( 84 'term_id' => (int) $t->term_id, 85 'tag_slug' => (string) $t->tag_slug, 86 'tag_name' => (string) $t->tag_name, 87 'tag_description' => (string) $t->tag_description 88 ); 89 } 90 91 foreach ( $xml->xpath('/rss/channel/wp:term') as $term_arr ) { 92 $t = $term_arr->children( $namespaces['wp'] ); 93 $terms[] = array( 94 'term_id' => (int) $t->term_id, 95 'term_taxonomy' => (string) $t->term_taxonomy, 96 'slug' => (string) $t->term_slug, 97 'term_parent' => (string) $t->term_parent, 98 'term_name' => (string) $t->term_name, 99 'term_description' => (string) $t->term_description 100 ); 101 } 102 103 // grab posts 104 foreach ( $xml->channel->item as $item ) { 105 $post = array( 106 'post_title' => (string) $item->title, 107 'guid' => (string) $item->guid, 108 ); 109 110 $dc = $item->children( 'http://purl.org/dc/elements/1.1/' ); 111 $post['post_author'] = (string) $dc->creator; 112 113 $content = $item->children( 'http://purl.org/rss/1.0/modules/content/' ); 114 $excerpt = $item->children( $namespaces['excerpt'] ); 115 $post['post_content'] = (string) $content->encoded; 116 $post['post_excerpt'] = (string) $excerpt->encoded; 117 118 $wp = $item->children( $namespaces['wp'] ); 119 $post['post_id'] = (int) $wp->post_id; 120 $post['post_date'] = (string) $wp->post_date; 121 $post['post_date_gmt'] = (string) $wp->post_date_gmt; 122 $post['comment_status'] = (string) $wp->comment_status; 123 $post['ping_status'] = (string) $wp->ping_status; 124 $post['post_name'] = (string) $wp->post_name; 125 $post['status'] = (string) $wp->status; 126 $post['post_parent'] = (int) $wp->post_parent; 127 $post['menu_order'] = (int) $wp->menu_order; 128 $post['post_type'] = (string) $wp->post_type; 129 $post['post_password'] = (string) $wp->post_password; 130 $post['is_sticky'] = (int) $wp->is_sticky; 131 132 foreach ( $item->category as $c ) { 133 $att = $c->attributes(); 134 if ( isset( $att['nicename'] ) ) 135 $post['terms'][] = array( 136 'name' => (string) $c, 137 'slug' => (string) $att['nicename'], 138 'domain' => (string) $att['domain'] 139 ); 140 } 141 142 foreach ( $wp->postmeta as $meta ) { 143 $post['postmeta'][] = array( 144 'key' => (string) $meta->meta_key, 145 'value' => (string) $meta->meta_value, 146 ); 147 } 148 149 foreach ( $wp->comment as $comment ) { 150 $post['comments'][] = array( 151 'comment_id' => (int) $comment->comment_id, 152 'comment_author' => (string) $comment->comment_author, 153 'comment_author_email' => (string) $comment->comment_author_email, 154 'comment_author_IP' => (string) $comment->comment_author_IP, 155 'comment_author_url' => (string) $comment->comment_author_url, 156 'comment_date' => (string) $comment->comment_date, 157 'comment_date_gmt' => (string) $comment->comment_date_gmt, 158 'comment_content' => (string) $comment->comment_content, 159 'comment_approved' => (string) $comment->comment_approved, 160 'comment_type' => (string) $comment->comment_type, 161 'comment_parent' => (string) $comment->comment_parent, 162 'comment_user_id' => (int) $comment->comment_user_id, 163 ); 164 } 165 166 $posts[] = $post; 167 } 168 169 return array( 170 'authors' => $authors, 171 'posts' => $posts, 172 'categories' => $categories, 173 'tags' => $tags, 174 'terms' => $terms, 175 'base_url' => $base_url 176 ); 177 } 178 } 179 180 /** 181 * WXR Parser that makes use of the XML Parser PHP extension 182 * 183 * @todo wxr checking 184 */ 185 class WXR_Parser_XML { 186 var $wp_tags = array( 187 'wp:post_id', 'wp:post_date', 'wp:post_date_gmt', 'wp:comment_status', 'wp:ping_status', 188 'wp:status', 'wp:post_name', 'wp:post_parent', 'wp:menu_order', 'wp:post_type', 'wp:post_password', 189 'wp:is_sticky', 'wp:term_id', 'wp:category_nicename', 'wp:category_parent', 'wp:cat_name', 'wp:category_description', 190 'wp:tag_slug', 'wp:tag_name', 'wp:tag_description', 'wp:term_taxonomy', 'wp:term_parent', 191 'wp:term_name', 'wp:term_description', 'wp:author_login', 'wp:author_email', 'wp:author_display_name', 192 'wp:author_first_name', 'wp:author_last_name', 193 ); 194 var $wp_sub_tags = array( 195 'wp:comment_id', 'wp:comment_author', 'wp:comment_author_email', 'wp:comment_author_url', 196 'wp:comment_author_IP', 'wp:comment_date', 'wp:comment_date_gmt', 'wp:comment_content', 197 'wp:comment_approved', 'wp:comment_type', 'wp:comment_parent', 'wp:comment_user_id', 198 ); 199 200 function parse( $file ) { 201 $this->is_wxr_file = $this->in_post = $this->cdata = $this->data = $this->sub_data = $this->in_tag = $this->in_sub_tag = false; 202 $this->authors = $this->posts = $this->term = $this->category = $this->tag = array(); 203 204 $xml = xml_parser_create( 'UTF-8' ); 205 xml_parser_set_option( $xml, XML_OPTION_SKIP_WHITE, 1 ); 206 xml_parser_set_option( $xml, XML_OPTION_CASE_FOLDING, 0 ); 207 xml_set_object( $xml, $this ); 208 xml_set_character_data_handler( $xml, 'cdata' ); 209 xml_set_element_handler( $xml, 'tag_open', 'tag_close' ); 210 211 if ( ! xml_parse( $xml, file_get_contents( $file ), true ) ) { 212 $error_code = xml_get_error_code( $xml ); 213 $error_string = xml_error_string( $error_code ); 214 return new WP_Error( 'WXR_parse_error', 'There was an error when reading this WXR file', array( $error_code, $error_string ) ); 215 } 216 xml_parser_free( $xml ); 217 218 if ( ! $this->is_wxr_file ) 219 return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); 220 221 return array( 222 'authors' => $this->authors, 223 'posts' => $this->posts, 224 'categories' => $this->category, 225 'tags' => $this->tag, 226 'terms' => $this->term, 227 'base_url' => $this->base_url 228 ); 229 } 230 231 function tag_open( $parse, $tag, $attr ) { 232 if ( in_array( $tag, $this->wp_tags ) ) { 233 $this->in_tag = substr( $tag, 3 ); 234 return; 235 } 236 237 if ( in_array( $tag, $this->wp_sub_tags ) ) { 238 $this->in_sub_tag = substr( $tag, 3 ); 239 return; 240 } 241 242 switch ( $tag ) { 243 case 'category': 244 if ( isset($attr['domain'], $attr['nicename']) ) { 245 $this->sub_data['domain'] = $attr['domain']; 246 $this->sub_data['slug'] = $attr['nicename']; 247 } 248 break; 249 case 'item': $this->in_post = true; 250 case 'title': if ( $this->in_post ) $this->in_tag = 'post_title'; break; 251 case 'guid': $this->in_tag = 'guid'; break; 252 case 'dc:creator': $this->in_tag = 'post_author'; break; 253 case 'content:encoded': $this->in_tag = 'post_content'; break; 254 case 'excerpt:encoded': $this->in_tag = 'post_excerpt'; break; 255 256 case 'wp:term_slug': $this->in_tag = 'slug'; break; 257 case 'wp:meta_key': $this->in_sub_tag = 'key'; break; 258 case 'wp:meta_value': $this->in_sub_tag = 'value'; break; 259 } 260 } 261 262 function cdata( $parser, $cdata ) { 263 if ( ! trim( $cdata ) ) 264 return; 265 266 $this->cdata .= trim( $cdata ); 267 } 268 269 function tag_close( $parser, $tag ) { 270 switch ( $tag ) { 271 case 'wp:comment': 272 if ( ! empty( $this->sub_data ) ) 273 $this->data['comments'][] = $this->sub_data; 274 $this->sub_data = false; 275 break; 276 case 'category': 277 if ( ! empty( $this->sub_data ) ) { 278 $this->sub_data['name'] = $this->cdata; 279 $this->data['terms'][] = $this->sub_data; 280 } 281 $this->sub_data = false; 282 break; 283 case 'wp:postmeta': 284 if ( ! empty( $this->sub_data ) ) 285 $this->data['postmeta'][] = $this->sub_data; 286 $this->sub_data = false; 287 break; 288 case 'item': 289 $this->posts[] = $this->data; 290 $this->data = false; 291 break; 292 case 'wp:category': 293 case 'wp:tag': 294 case 'wp:term': 295 $n = substr( $tag, 3 ); 296 array_push( $this->$n, $this->data ); 297 $this->data = false; 298 break; 299 case 'wp:author': 300 if ( ! empty($this->data['author_login']) ) 301 $this->authors[$this->data['author_login']] = $this->data; 302 $this->data = false; 303 break; 304 case 'wp:base_site_url': 305 $this->base_url = $this->cdata; 306 break; 307 case 'wp:wxr_version': 308 $this->is_wxr_file = preg_match( '/\d+\.\d+/', $this->cdata ); 309 break; 310 311 default: 312 if ( $this->in_sub_tag ) { 313 $this->sub_data[$this->in_sub_tag] = ! empty( $this->cdata ) ? $this->cdata : ''; 314 $this->in_sub_tag = false; 315 } else if ( $this->in_tag ) { 316 $this->data[$this->in_tag] = ! empty( $this->cdata ) ? $this->cdata : ''; 317 $this->in_tag = false; 318 } 319 } 320 321 $this->cdata = false; 322 } 323 } 324 325 class WXR_Parser_Regex { 326 function WXR_Parser_Regex() { 327 $this->__construct(); 328 } 329 330 function __construct() { 331 $this->has_gzip = is_callable( 'gzopen' ); 332 } 333 334 function parse( $file ) { 335 $is_wxr = $in_post = false; 336 337 $fp = $this->fopen( $file, 'r' ); 338 if ( $fp ) { 339 while ( ! $this->feof( $fp ) ) { 340 $importline = rtrim( $this->fgets( $fp ) ); 341 342 if ( ! $is_wxr && preg_match( '|<wp:wxr_version>\d+\.\d+</wp:wxr_version>|', $importline ) ) 343 $is_wxr = true; 344 345 if ( false !== strpos( $importline, '<wp:base_site_url>' ) ) { 346 preg_match( '|<wp:base_site_url>(.*?)</wp:base_site_url>|is', $importline, $url ); 347 $this->base_url = $url[1]; //esc_url (?) 348 continue; 349 } 350 if ( false !== strpos( $importline, '<wp:category>' ) ) { 351 preg_match( '|<wp:category>(.*?)</wp:category>|is', $importline, $category ); 352 $this->categories[] = $this->process_category( $category[1] ); 353 continue; 354 } 355 if ( false !== strpos( $importline, '<wp:tag>' ) ) { 356 preg_match( '|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag ); 357 $this->tags[] = $this->process_tag( $tag[1] ); 358 continue; 359 } 360 if ( false !== strpos( $importline, '<wp:term>' ) ) { 361 preg_match( '|<wp:term>(.*?)</wp:term>|is', $importline, $term ); 362 $this->terms[] = $this->process_term( $term[1] ); 363 continue; 364 } 365 if ( false !== strpos( $importline, '<wp:author>' ) ) { 366 preg_match( '|<wp:author>(.*?)</wp:author>|is', $importline, $author ); 367 $a = $this->process_author( $author[1] ); 368 $this->authors[$a['author_login']] = $a; 369 continue; 370 } 371 if ( false !== strpos( $importline, '<item>' ) ) { 372 $post = ''; 373 $in_post = true; 374 continue; 375 } 376 if ( false !== strpos( $importline, '</item>' ) ) { 377 $in_post = false; 378 $this->posts[] = $this->process_post( $post ); 379 continue; 380 } 381 if ( $in_post ) { 382 $post .= $importline . "\n"; 383 } 384 } 385 386 $this->fclose($fp); 387 } 388 389 if ( ! $is_wxr ) 390 return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); 391 392 return array( 393 'authors' => $this->authors, 394 'posts' => $this->posts, 395 'categories' => $this->categories, 396 'tags' => $this->tags, 397 'terms' => $this->terms, 398 'base_url' => $this->base_url 399 ); 400 } 401 402 function get_tag( $string, $tag ) { 403 global $wpdb; 404 preg_match( "|<$tag.*?>(.*?)</$tag>|is", $string, $return ); 405 if ( isset( $return[1] ) ) { 406 $return = preg_replace( '|^<!\[CDATA\[(.*)\]\]>$|s', '$1', $return[1] ); 407 $return = $wpdb->escape( trim( $return ) ); 408 } else { 409 $return = ''; 410 } 411 return $return; 412 } 413 414 function process_category( $c ) { 415 return array( 416 'term_id' => $this->get_tag( $c, 'wp:term_id' ), 417 'cat_name' => $this->get_tag( $c, 'wp:cat_name' ), 418 'category_nicename' => $this->get_tag( $c, 'wp:category_nicename' ), 419 'category_parent' => $this->get_tag( $c, 'wp:category_parent' ), 420 'category_description' => $this->get_tag( $c, 'wp:category_description' ), 421 ); 422 } 423 424 function process_tag( $t ) { 425 return array( 426 'term_id' => $this->get_tag( $t, 'wp:term_id' ), 427 'tag_name' => $this->get_tag( $t, 'wp:tag_name' ), 428 'tag_slug' => $this->get_tag( $t, 'wp:tag_slug' ), 429 'tag_description' => $this->get_tag( $t, 'wp:tag_description' ), 430 ); 431 } 432 433 function process_term( $t ) { 434 return array( 435 'term_id' => $this->get_tag( $t, 'wp:term_id' ), 436 'term_taxonomy' => $this->get_tag( $t, 'wp:term_taxonomy' ), 437 'slug' => $this->get_tag( $t, 'wp:term_slug' ), 438 'term_parent' => $this->get_tag( $t, 'wp:term_parent' ), 439 'term_name' => $this->get_tag( $t, 'wp:term_name' ), 440 'term_description' => $this->get_tag( $t, 'wp:term_description' ), 441 ); 442 } 443 444 function process_author( $a ) { 445 return array( 446 'author_login' => $this->get_tag( $a, 'wp:author_login' ), 447 'author_email' => $this->get_tag( $a, 'wp:author_email' ), 448 'author_display_name' => $this->get_tag( $a, 'wp:author_display_name' ), 449 'author_first_name' => $this->get_tag( $a, 'wp:author_first_name' ), 450 'author_last_name' => $this->get_tag( $a, 'wp:author_last_name' ), 451 ); 452 } 453 454 function process_post( $post ) { 455 $post_id = $this->get_tag( $post, 'wp:post_id' ); 456 $post_title = $this->get_tag( $post, 'title' ); 457 $post_date = $this->get_tag( $post, 'wp:post_date' ); 458 $post_date_gmt = $this->get_tag( $post, 'wp:post_date_gmt' ); 459 $comment_status = $this->get_tag( $post, 'wp:comment_status' ); 460 $ping_status = $this->get_tag( $post, 'wp:ping_status' ); 461 $status = $this->get_tag( $post, 'wp:status' ); 462 $post_name = $this->get_tag( $post, 'wp:post_name' ); 463 $post_parent = $this->get_tag( $post, 'wp:post_parent' ); 464 $menu_order = $this->get_tag( $post, 'wp:menu_order' ); 465 $post_type = $this->get_tag( $post, 'wp:post_type' ); 466 $post_password = $this->get_tag( $post, 'wp:post_password' ); 467 $is_sticky = $this->get_tag( $post, 'wp:is_sticky' ); 468 $guid = $this->get_tag( $post, 'guid' ); 469 $post_author = $this->get_tag( $post, 'dc:creator' ); 470 471 $post_excerpt = $this->get_tag( $post, 'excerpt:encoded' ); 472 $post_excerpt = preg_replace_callback( '|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_excerpt ); 473 $post_excerpt = str_replace( '<br>', '<br />', $post_excerpt ); 474 $post_excerpt = str_replace( '<hr>', '<hr />', $post_excerpt ); 475 476 $post_content = $this->get_tag( $post, 'content:encoded' ); 477 $post_content = preg_replace_callback( '|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_content ); 478 $post_content = str_replace( '<br>', '<br />', $post_content ); 479 $post_content = str_replace( '<hr>', '<hr />', $post_content ); 480 481 $postdata = compact( 'post_id', 'post_author', 'post_date', 'post_date_gmt', 'post_content', 'post_excerpt', 482 'post_title', 'status', 'post_name', 'comment_status', 'ping_status', 'guid', 'post_parent', 483 'menu_order', 'post_type', 'post_password', 'is_sticky' 484 ); 485 486 preg_match_all( '|<category domain="([^"]+?)" nicename="([^"]+?)">(.+?)</category>|is', $post, $terms, PREG_SET_ORDER ); 487 foreach ( $terms as $t ) { 488 $post_terms[] = array( 489 'slug' => $t[2], 490 'domain' => $t[1], 491 'name' => str_replace( array( '<![CDATA[', ']]>' ), '', $t[3] ), 492 ); 493 } 494 if ( ! empty( $post_terms ) ) $postdata['terms'] = $post_terms; 495 496 preg_match_all( '|<wp:comment>(.+?)</wp:comment>|is', $post, $comments ); 497 $comments = $comments[1]; 498 if ( $comments ) { 499 foreach ( $comments as $comment ) { 500 $post_comments[] = array( 501 'comment_id' => $this->get_tag( $comment, 'wp:comment_id' ), 502 'comment_author' => $this->get_tag( $comment, 'wp:comment_author' ), 503 'comment_author_email' => $this->get_tag( $comment, 'wp:comment_author_email' ), 504 'comment_author_IP' => $this->get_tag( $comment, 'wp:comment_author_IP' ), 505 'comment_author_url' => $this->get_tag( $comment, 'wp:comment_author_url' ), 506 'comment_date' => $this->get_tag( $comment, 'wp:comment_date' ), 507 'comment_date_gmt' => $this->get_tag( $comment, 'wp:comment_date_gmt' ), 508 'comment_content' => $this->get_tag( $comment, 'wp:comment_content' ), 509 'comment_approved' => $this->get_tag( $comment, 'wp:comment_approved' ), 510 'comment_type' => $this->get_tag( $comment, 'wp:comment_type' ), 511 'comment_parent' => $this->get_tag( $comment, 'wp:comment_parent' ), 512 ); 513 } 514 } 515 if ( ! empty( $post_comments ) ) $postdata['comments'] = $post_comments; 516 517 preg_match_all( '|<wp:postmeta>(.+?)</wp:postmeta>|is', $post, $postmeta ); 518 $postmeta = $postmeta[1]; 519 if ( $postmeta) { 520 foreach ( $postmeta as $p ) { 521 $post_postmeta[] = array( 522 'key' => $this->get_tag( $p, 'wp:meta_key' ), 523 'value' => $this->get_tag( $p, 'wp:meta_value' ), 524 ); 525 } 526 } 527 if ( ! empty( $post_postmeta ) ) $postdata['postmeta'] = $post_postmeta; 528 529 return $postdata; 530 } 531 532 function _normalize_tag( $matches ) { 533 return '<' . strtolower( $matches[1] ); 534 } 535 536 function fopen( $filename, $mode = 'r' ) { 537 if ( $this->has_gzip ) 538 return gzopen( $filename, $mode ); 539 return fopen( $filename, $mode ); 540 } 541 542 function feof( $fp ) { 543 if ( $this->has_gzip ) 544 return gzeof( $fp ); 545 return feof( $fp ); 546 } 547 548 function fgets( $fp, $len = 8192 ) { 549 if ( $this->has_gzip ) 550 return gzgets( $fp, $len ); 551 return fgets( $fp, $len ); 552 } 553 554 function fclose( $fp ) { 555 if ( $this->has_gzip ) 556 return gzclose( $fp ); 557 return fclose( $fp ); 558 } 559 } -
trunk/readme.txt
3 3 Donate link: 4 4 Tags: importer, wordpress 5 5 Requires at least: 3.0 6 Tested up to: 3.0 6 Tested up to: 3.0.1 7 7 Stable tag: 0.2 8 8 9 Import posts, pages, comments, custom fields, categories, and tagsfrom a WordPress export file.9 Import posts, pages, comments, custom fields, categories, tags and more from a WordPress export file. 10 10 11 11 == Description == 12 12 13 Import posts, pages, comments, custom fields, categories, and tagsfrom a WordPress export file.13 Import posts, pages, comments, custom fields, categories, tags and more from a WordPress export file. 14 14 15 15 == Installation == 16 16 17 17 1. Upload the `wordpress-importer` folder to the `/wp-content/plugins/` directory 18 18 1. Activate the plugin through the 'Plugins' menu in WordPress 19 1. Go to the Tools -> Import screen, Click on WordPress19 1. Go to the Tools -> Import screen, click on WordPress 20 20 21 == Frequently Asked Questions==21 == Changelog == 22 22 23 == Screenshots == 23 = 0.3 = 24 * Use an XML Parser if possible 25 * Proper import support for nav menus 26 * ... and more 24 27 25 == Changelog ==26 27 28 = 0.1 = 28 29 * Initial release 30 31 == Upgrade Notice == 32 33 = 0.3 = 34 Upgrade for a more robust and reliable experience when importing WordPress export file. 35 36 == Filters == 37 38 The importer has a couple of filters to allow you to completely enable/block certain features: 39 * `import_allow_create_users`: return false if you only want to allow mapping to existing users 40 * `import_allow_fetch_attachments`: return false if you do not wish to allow importing and downloading of attachments -
trunk/wordpress-importer.php
2 2 /* 3 3 Plugin Name: WordPress Importer 4 4 Plugin URI: http://wordpress.org/extend/plugins/wordpress-importer/ 5 Description: Import posts, pages, comments, custom fields, categories, and tagsfrom a WordPress export file.5 Description: Import posts, pages, comments, custom fields, categories, tags and more from a WordPress export file. 6 6 Author: wordpressdotorg 7 7 Author URI: http://wordpress.org/ 8 Version: 0.2 9 Stable tag: 0.2 8 Version: 0.3 10 9 License: GPL v2 - http://www.gnu.org/licenses/old-licenses/gpl-2.0.html 11 10 */ 12 11 13 if ( ! defined('WP_LOAD_IMPORTERS') )12 if ( ! defined( 'WP_LOAD_IMPORTERS' ) ) 14 13 return; 15 14 16 15 // Load Importer API 17 16 require_once ABSPATH . 'wp-admin/includes/import.php'; 18 17 19 if ( ! class_exists( 'WP_Importer' ) ) {18 if ( ! class_exists( 'WP_Importer' ) ) { 20 19 $class_wp_importer = ABSPATH . 'wp-admin/includes/class-wp-importer.php'; 21 20 if ( file_exists( $class_wp_importer ) ) 22 require _once$class_wp_importer;21 require $class_wp_importer; 23 22 } 24 23 24 // include WXR file parsers 25 require dirname( __FILE__ ) . '/parsers.php'; 26 25 27 /** 26 * WordPress Importer 28 * WordPress Importer class for managing the import process of a WXR file 27 29 * 28 30 * @package WordPress 29 31 * @subpackage Importer 30 32 */ 31 33 if ( class_exists( 'WP_Importer' ) ) { 32 34 class WP_Import extends WP_Importer { 33 34 var $post_ids_processed = array ();35 var $orphans = array ();36 var $file;37 35 var $id; 38 var $mtnames = array ();39 var $newauthornames = array ();40 var $allauthornames = array ();41 36 42 var $author_ids = array (); 43 var $tags = array (); 44 var $categories = array (); 45 var $terms = array (); 46 var $authors = array (); 37 var $authors = array(); 38 var $posts = array(); 39 var $terms = array(); 40 var $categories = array(); 41 var $tags = array(); 42 var $base_url = ''; 47 43 48 var $j = -1; 44 var $processed_authors = array(); 45 var $processed_terms = array(); 46 var $processed_posts = array(); 47 var $post_orphans = array(); 48 var $processed_menu_items = array(); 49 var $menu_item_orphans = array(); 50 var $missing_menu_items = array(); 51 49 52 var $fetch_attachments = false; 50 var $url_remap = array 53 var $url_remap = array(); 51 54 52 function header() { 53 echo '<div class="wrap">'; 54 screen_icon(); 55 echo '<h2>'.__('Import WordPress', 'wordpress-importer').'</h2>'; 56 } 55 function WP_Import() { /* nothing */ } 57 56 58 function footer() { 59 echo '</div>'; 57 function dispatch() { 58 $this->header(); 59 60 $step = empty( $_GET['step'] ) ? 0 : (int) $_GET['step']; 61 switch ( $step ) { 62 case 0: 63 $this->greet(); 64 break; 65 case 1: 66 check_admin_referer( 'import-upload' ); 67 if ( $this->handle_upload() ) 68 $this->import_options(); 69 break; 70 case 2: 71 check_admin_referer( 'import-wordpress' ); 72 $this->fetch_attachments = ( ! empty( $_POST['fetch_attachments'] ) && $this->allow_fetch_attachments() ); 73 $this->id = (int) $_POST['import_id']; 74 $file = get_attached_file( $this->id ); 75 $this->import( $file ); 76 break; 77 } 78 79 $this->footer(); 60 80 } 61 81 62 function greet() { 63 echo '<div class="narrow">'; 64 echo '<p>'.__('Howdy! Upload your WordPress eXtended RSS (WXR) file and we’ll import the posts, pages, comments, custom fields, categories, and tags into this site.', 'wordpress-importer').'</p>'; 65 echo '<p>'.__('Choose a WordPress WXR file to upload, then click Upload file and import.', 'wordpress-importer').'</p>'; 66 wp_import_upload_form("admin.php?import=wordpress&step=1"); 67 echo '</div>'; 68 } 82 function import( $file ) { 83 add_filter( 'import_post_meta_key', array( $this, 'is_valid_meta_key' ) ); 69 84 70 function get_tag( $string, $tag ) { 71 global $wpdb; 72 preg_match("|<$tag.*?>(.*?)</$tag>|is", $string, $return); 73 if ( isset($return[1]) ) { 74 $return = preg_replace('|^<!\[CDATA\[(.*)\]\]>$|s', '$1', $return[1]); 75 $return = $wpdb->escape( trim( $return ) ); 76 } else { 77 $return = ''; 78 } 79 return $return; 80 } 85 $this->import_start( $file ); 81 86 82 function has_gzip() { 83 return is_callable('gzopen'); 84 } 87 $this->get_author_mapping(); 85 88 86 function fopen($filename, $mode='r') { 87 if ( $this->has_gzip() ) 88 return gzopen($filename, $mode); 89 return fopen($filename, $mode); 90 } 89 wp_suspend_cache_invalidation( true ); 90 $this->process_categories(); 91 $this->process_tags(); 92 $this->process_terms(); 93 $this->process_posts(); 94 wp_suspend_cache_invalidation( false ); 91 95 92 function feof($fp) { 93 if ( $this->has_gzip() ) 94 return gzeof($fp); 95 return feof($fp); 96 } 96 // update items with missing/incorrect parent IDs 97 $this->backfill_parents(); 98 // update attachment references within posts and postmeta 99 $this->backfill_attachment_urls(); 97 100 98 function fgets($fp, $len=8192) { 99 if ( $this->has_gzip() ) 100 return gzgets($fp, $len); 101 return fgets($fp, $len); 101 $this->import_end(); 102 102 } 103 103 104 function fclose($fp) { 105 if ( $this->has_gzip() ) 106 return gzclose($fp); 107 return fclose($fp); 108 } 109 110 function get_entries($process_post_func=NULL) { 111 set_magic_quotes_runtime(0); 112 113 $doing_entry = false; 114 $is_wxr_file = false; 115 116 $fp = $this->fopen($this->file, 'r'); 117 if ($fp) { 118 while ( !$this->feof($fp) ) { 119 $importline = rtrim($this->fgets($fp)); 120 121 // this doesn't check that the file is perfectly valid but will at least confirm that it's not the wrong format altogether 122 if ( !$is_wxr_file && preg_match('|xmlns:wp="http://wordpress[.]org/export/\d+[.]\d+/"|', $importline) ) 123 $is_wxr_file = true; 124 125 if ( false !== strpos($importline, '<wp:base_site_url>') ) { 126 preg_match('|<wp:base_site_url>(.*?)</wp:base_site_url>|is', $importline, $url); 127 $this->base_url = $url[1]; 128 continue; 129 } 130 if ( false !== strpos($importline, '<wp:category>') ) { 131 preg_match('|<wp:category>(.*?)</wp:category>|is', $importline, $category); 132 $this->categories[] = $category[1]; 133 continue; 134 } 135 if ( false !== strpos($importline, '<wp:tag>') ) { 136 preg_match('|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag); 137 $this->tags[] = $tag[1]; 138 continue; 139 } 140 if ( false !== strpos($importline, '<wp:term>') ) { 141 preg_match('|<wp:term>(.*?)</wp:term>|is', $importline, $term); 142 $this->terms[] = $term[1]; 143 continue; 144 } 145 if ( false !== strpos($importline, '<wp:author>') ) { 146 preg_match('|<wp:author>(.*?)</wp:author>|is', $importline, $author); 147 $this->authors[] = $author[1]; 148 continue; 149 } 150 if ( false !== strpos($importline, '<item>') ) { 151 $this->post = ''; 152 $doing_entry = true; 153 continue; 154 } 155 if ( false !== strpos($importline, '</item>') ) { 156 $doing_entry = false; 157 if ($process_post_func) 158 call_user_func($process_post_func, $this->post); 159 continue; 160 } 161 if ( $doing_entry ) { 162 $this->post .= $importline . "\n"; 163 } 164 } 165 166 $this->fclose($fp); 104 function import_start( $file ) { 105 $import_arr = $this->parse( $file ); 106 107 if ( is_wp_error( $import_arr ) ) { 108 echo '<p><strong>' . __( 'Sorry, there has been an error.', 'wordpress-importer' ) . '</strong></p>'; 109 echo '<p>' . esc_html( $import_arr->get_error_message() ) . '</p>'; 110 $this->footer(); 111 die(); 167 112 } 113 114 $this->authors = $import_arr['authors']; 115 $this->posts = $import_arr['posts']; 116 $this->terms = $import_arr['terms']; 117 $this->categories = $import_arr['categories']; 118 $this->tags = $import_arr['tags']; 119 $this->base_url = esc_url( $import_arr['base_url'] ); 168 120 169 return $is_wxr_file; 121 wp_defer_term_counting( true ); 122 wp_defer_comment_counting( true ); 170 123 124 do_action( 'import_start' ); 171 125 } 172 126 173 function get_wp_authors() { 174 // We need to find unique values of author names, while preserving the order, so this function emulates the unique_value(); php function, without the sorting. 175 $temp = $this->allauthornames; 176 $authors[0] = array_shift($temp); 177 $y = count($temp) + 1; 178 for ($x = 1; $x < $y; $x ++) { 179 $next = array_shift($temp); 180 if (!(in_array($next, $authors))) 181 array_push($authors, $next); 127 function import_end() { 128 wp_import_cleanup( $this->id ); 129 130 wp_cache_flush(); 131 foreach ( get_taxonomies() as $tax ) { 132 delete_option( "{$tax}_children" ); 133 _get_term_hierarchy( $tax ); 182 134 } 135 136 wp_defer_term_counting( false ); 137 wp_defer_comment_counting( false ); 138 139 echo '<p>' . __( 'All done.' ) . ' <a href="' . admin_url() . '">' . __( 'Have fun!' ) . '</a>' . '</p>'; 183 140 184 return $authors;141 do_action( 'import_end' ); 185 142 } 186 143 187 function get_authors_from_post() {188 global $current_user;144 function handle_upload() { 145 $file = wp_import_handle_upload(); 189 146 190 // this will populate $this->author_ids with a list of author_names => user_ids 147 if ( isset( $file['error'] ) ) { 148 echo '<p><strong>' . __( 'Sorry, there has been an error.', 'wordpress-importer' ) . '</strong></p>'; 149 echo '<p>' . esc_html( $file['error'] ) . '</p>'; 150 return false; 151 } 191 152 192 foreach ( (array) $_POST['author_in'] as $i => $in_author_name ) { 193 194 if ( !empty($_POST['user_select'][$i]) ) { 195 // an existing user was selected in the dropdown list 196 $user = get_userdata( intval($_POST['user_select'][$i]) ); 197 if ( isset($user->ID) ) 198 $this->author_ids[$in_author_name] = $user->ID; 199 } 200 elseif ( $this->allow_create_users() ) { 201 // nothing was selected in the dropdown list, so we'll use the name in the text field 202 203 $new_author_name = trim($_POST['user_create'][$i]); 204 // if the user didn't enter a name, assume they want to use the same name as in the import file 205 if ( empty($new_author_name) ) 206 $new_author_name = $in_author_name; 207 208 $user_id = username_exists($new_author_name); 209 if ( !$user_id ) { 210 $user_id = wp_create_user($new_author_name, wp_generate_password()); 211 } 212 213 if ( !is_wp_error( $user_id ) ) { 214 $this->author_ids[$in_author_name] = $user_id; 215 } 216 } 217 218 // failsafe: if the user_id was invalid, default to the current user 219 if ( empty($this->author_ids[$in_author_name]) ) { 220 $this->author_ids[$in_author_name] = intval($current_user->ID); 221 } 153 $this->id = (int) $file['id']; 154 $import_data = $this->parse( $file['file'] ); 155 if ( is_wp_error( $import_data ) ) { 156 echo '<p><strong>' . __( 'Sorry, there has been an error.', 'wordpress-importer' ) . '</strong></p>'; 157 echo '<p>' . esc_html( $import_data->get_error_message() ) . '</p>'; 158 return false; 222 159 } 160 $this->authors = $import_data['authors']; 223 161 162 return true; 224 163 } 225 164 226 function wp_authors_form() { 165 function import_options() { 166 $j = 0; 227 167 ?> 228 <h2><?php _e('Assign Authors', 'wordpress-importer'); ?></h2> 229 <p><?php _e('To make it easier for you to edit and save the imported posts and drafts, you may want to change the name of the author of the posts. For example, you may want to import all the entries as <code>admin</code>s entries.', 'wordpress-importer'); ?></p> 230 <?php 231 if ( $this->allow_create_users() ) { 232 echo '<p>'.__('If a new user is created by WordPress, a password will be randomly generated. Manually change the user’s details if necessary.', 'wordpress-importer')."</p>\n"; 233 } 168 <form action="<?php echo admin_url( 'admin.php?import=wordpress&step=2' ); ?>" method="post"> 169 <?php wp_nonce_field( 'import-wordpress' ); ?> 170 <input type="hidden" name="import_id" value="<?php echo $this->id; ?>" /> 234 171 172 <?php if ( ! empty( $this->authors ) ) : ?> 173 <h3><?php _e('Assign Authors', 'wordpress-importer'); ?></h3> 174 <p><?php _e( 'To make it easier for you to edit and save the imported content, you may want to reassign the author of the imported item to an existing user of this site. For example, you may want to import all the entries as <code>admin</code>s entries.', 'wordpress-importer' ); ?></p> 175 <?php if ( $this->allow_create_users() ) : ?> 176 <p><?php printf( __( 'If a new user is created by WordPress, a new password will be randomly generated and the new user’s role will be set as %s. Manually changing the new user’s details will be necessary.', 'wordpress-importer' ), esc_html( get_option('default_role') ) ); ?></p> 177 <?php endif; ?> 178 <ol id="authors"> 179 <?php foreach ( $this->authors as $author ) : ?> 180 <li><?php $this->author_select( $j++, $author ); ?></li> 181 <?php endforeach; ?> 182 </ol> 183 <?php endif; ?> 235 184 236 $authors = $this->get_wp_authors(); 237 echo '<form action="?import=wordpress&step=2&id=' . $this->id . '" method="post">'; 238 wp_nonce_field('import-wordpress'); 239 ?> 240 <ol id="authors"> 241 <?php 242 $j = -1; 243 foreach ($authors as $author) { 244 ++ $j; 245 echo '<li>'.__('Import author:', 'wordpress-importer').' <strong>'.$author.'</strong><br />'; 246 $this->users_form($j, $author); 247 echo '</li>'; 248 } 185 <?php if ( $this->allow_fetch_attachments() ) : ?> 186 <h3><?php _e('Import Attachments', 'wordpress-importer'); ?></h3> 187 <p> 188 <input type="checkbox" value="1" name="fetch_attachments" id="import-attachments" /> 189 <label for="import-attachments"><?php _e( 'Download and import file attachments', 'wordpress-importer' ); ?></label> 190 </p> 191 <?php endif; ?> 249 192 250 if ( $this->allow_fetch_attachments() ) { 251 ?> 252 </ol> 253 <h2><?php _e('Import Attachments', 'wordpress-importer'); ?></h2> 254 <p> 255 <input type="checkbox" value="1" name="attachments" id="import-attachments" /> 256 <label for="import-attachments"><?php _e('Download and import file attachments', 'wordpress-importer') ?></label> 257 </p> 258 193 <p class="submit"><input type="submit" class="button" value="<?php esc_attr_e( 'Submit', 'wordpress-importer' ); ?>" /></p> 194 </form> 259 195 <?php 260 196 } 261 197 262 echo '<p class="submit">'; 263 echo '<input type="submit" class="button" value="'. esc_attr__('Submit', 'wordpress-importer') .'" />'.'<br />'; 264 echo '</p>'; 265 echo '</form>'; 198 function author_select( $n, $author ) { 199 if ( $this->allow_create_users() ) 200 printf( __( 'Import author %1$s or map to existing user', 'wordpress-importer' ), '<strong>' . esc_html( $author['author_display_name'] ) . '</strong>' ); 201 else 202 printf( __( 'Map author %1$s to existing user', 'wordpress-importer' ), '<strong>' . esc_html( $author['author_display_name'] ) . '</strong>' ); 266 203 204 $users = get_users_of_blog(); ?> 205 <input type="hidden" name="imported_authors[<?php echo $n; ?>]" value="<?php echo esc_attr( $author['author_login'] ); ?>" /> 206 <select name="user_map[<?php echo $n; ?>]"> 207 <option value="0"><?php _e( '- Select -', 'wordpress-importer' ); ?></option> 208 <?php foreach ( $users as $user ) : ?> 209 <option value="<?php echo intval($user->ID); ?>"><?php echo esc_html( $user->display_name ); ?></option> 210 <?php endforeach; ?> 211 </select> 212 <?php 267 213 } 268 214 269 function users_form($n, $author) { 215 function get_author_mapping() { 216 if ( ! isset( $_POST['imported_authors'] ) ) 217 return; 270 218 271 if ( $this->allow_create_users() ) { 272 printf('<label>'.__('Create user %1$s or map to existing', 'wordpress-importer'), ' <input type="text" value="'. esc_attr($author) .'" name="'.'user_create['.intval($n).']'.'" maxlength="30" /></label> <br />'); 273 } 274 else { 275 echo __('Map to existing', 'wordpress-importer').'<br />'; 276 } 219 foreach ( (array) $_POST['imported_authors'] as $i => $login ) { 220 $login = sanitize_user( $login, true ); 277 221 278 // keep track of $n => $author name 279 echo '<input type="hidden" name="author_in['.intval($n).']" value="' . esc_attr($author).'" />'; 222 if ( ! empty( $_POST['user_map'][$i] ) ) { 223 $user = get_userdata( intval($_POST['user_map'][$i]) ); 224 if ( isset( $user->ID ) ) 225 $this->processed_authors[$login] = $user->ID; 226 } else if ( $this->allow_create_users() ) { 227 $user_id = username_exists( $login ); 228 if ( ! $user_id ) { 229 $user_data = array( 230 'user_login' => $login, 231 'user_pass' => wp_generate_password(), 232 'user_email' => $this->authors[$login]['author_email'], 233 'display_name' => $this->authors[$login]['author_display_name'], 234 'first_name' => $this->authors[$login]['author_first_name'], 235 'last_name' => $this->authors[$login]['author_last_name'], 236 ); 237 $user_id = wp_insert_user( $user_data ); 238 } 280 239 281 $users = get_users_of_blog(); 282 ?><select name="user_select[<?php echo $n; ?>]"> 283 <option value="0"><?php _e('- Select -', 'wordpress-importer'); ?></option> 284 <?php 285 foreach ($users as $user) { 286 echo '<option value="'.$user->user_id.'">'.$user->user_login.'</option>'; 287 } 288 ?> 289 </select> 290 <?php 291 } 240 if ( ! is_wp_error( $user_id ) ) 241 $this->processed_authors[$login] = $user_id; 242 } 292 243 293 function select_authors() { 294 $is_wxr_file = $this->get_entries(array(&$this, 'process_author')); 295 if ( $is_wxr_file ) { 296 $this->wp_authors_form(); 244 // failsafe: if the user_id was invalid, default to the current user 245 if ( empty( $this->processed_authors[$login] ) ) 246 $this->processed_authors[$login] = (int) get_current_user_id(); 297 247 } 298 else {299 echo '<h2>'.__('Invalid file', 'wordpress-importer').'</h2>';300 echo '<p>'.__('Please upload a valid WXR (WordPress eXtended RSS) export file.', 'wordpress-importer').'</p>';301 }302 248 } 303 249 304 // fetch the user ID for a given author name, respecting the mapping preferences305 function checkauthor($author) {306 global $current_user;307 308 if ( !empty($this->author_ids[$author]) )309 return $this->author_ids[$author];310 311 // failsafe: map to the current user312 return $current_user->ID;313 }314 315 316 317 250 function process_categories() { 318 global $wpdb; 251 if ( empty( $this->categories ) ) 252 return; 319 253 320 $cat_names = (array) get_terms('category', array('fields' => 'names')); 321 322 while ( $c = array_shift($this->categories) ) { 323 $cat_name = trim($this->get_tag( $c, 'wp:cat_name' )); 324 325 // If the category exists we leave it alone 326 if ( in_array($cat_name, $cat_names) ) 254 foreach ( $this->categories as $cat ) { 255 // if the category already exists leave it alone 256 $term_id = term_exists( $cat['category_nicename'], 'category' ); 257 if ( $term_id ) { 258 if ( is_array($term_id) ) $term_id = $term_id['term_id']; 259 $this->processed_terms[intval($cat['term_id'])] = (int) $term_id; 327 260 continue; 261 } 328 262 329 $category_nicename = $this->get_tag( $c, 'wp:category_nicename' ); 330 $category_description = $this->get_tag( $c, 'wp:category_description' ); 331 $posts_private = (int) $this->get_tag( $c, 'wp:posts_private' ); 332 $links_private = (int) $this->get_tag( $c, 'wp:links_private' ); 263 $category_parent = empty( $cat['category_parent'] ) ? 0 : category_exists( $cat['category_parent'] ); 264 $category_description = isset( $cat['category_description'] ) ? $cat['category_description'] : ''; 265 $catarr = array( 266 'category_nicename' => $cat['category_nicename'], 267 'category_parent' => $category_parent, 268 'cat_name' => $cat['cat_name'], 269 'category_description' => $category_description 270 ); 333 271 334 $parent = $this->get_tag( $c, 'wp:category_parent' ); 335 336 if ( empty($parent) ) 337 $category_parent = '0'; 338 else 339 $category_parent = category_exists($parent); 340 341 $catarr = compact('category_nicename', 'category_parent', 'posts_private', 'links_private', 'posts_private', 'cat_name', 'category_description'); 342 343 print '<em>' . sprintf( __( 'Importing category <em>%s</em>…' , 'wordpress-importer'), esc_html($cat_name) ) . '</em><br />' . "\n"; 344 $cat_ID = wp_insert_category($catarr); 272 $id = wp_insert_category( $catarr ); 273 if ( ! is_wp_error( $id ) ) { 274 $this->processed_terms[intval($cat['term_id'])] = $id; 275 } else { 276 echo 'Error importing category: ' . $id->get_error_message() . '<br />'; 277 continue; 278 } 345 279 } 346 280 } 347 281 348 282 function process_tags() { 349 global $wpdb; 283 if ( empty( $this->tags ) ) 284 return; 350 285 351 $tag_names = (array) get_terms('post_tag', array('fields' => 'names')); 352 353 while ( $c = array_shift($this->tags) ) { 354 $tag_name = trim($this->get_tag( $c, 'wp:tag_name' )); 355 356 // If the category exists we leave it alone 357 if ( in_array($tag_name, $tag_names) ) 286 foreach ( $this->tags as $tag ) { 287 // if the tag already exists leave it alone 288 $term_id = term_exists( $tag['tag_slug'], 'post_tag' ); 289 if ( $term_id ) { 290 if ( is_array($term_id) ) $term_id = $term_id['term_id']; 291 $this->processed_terms[intval($tag['term_id'])] = (int) $term_id; 358 292 continue; 293 } 359 294 360 $ slug = $this->get_tag( $c, 'wp:tag_slug' );361 $ description = $this->get_tag( $c, 'wp:tag_description');295 $tag_desc = isset( $tag['tag_description'] ) ? $tag['tag_description'] : ''; 296 $tagarr = array( 'slug' => $tag['tag_slug'], 'description' => $tag_desc ); 362 297 363 $tagarr = compact('slug', 'description'); 364 365 print '<em>' . sprintf( __( 'Importing tag <em>%s</em>…' , 'wordpress-importer'), esc_html($tag_name) ) . '</em><br />' . "\n"; 366 $tag_ID = wp_insert_term($tag_name, 'post_tag', $tagarr); 298 $id = wp_insert_term( $tag['tag_name'], 'post_tag', $tagarr ); 299 if ( ! is_wp_error( $id ) ) { 300 $this->processed_terms[intval($tag['term_id'])] = $id['term_id']; 301 } else { 302 echo 'Error importing post tag: ' . $id->get_error_message() . '<br />'; 303 continue; 304 } 367 305 } 368 306 } 369 307 370 308 function process_terms() { 371 global $wpdb, $wp_taxonomies; 309 if ( empty( $this->terms ) ) 310 return; 372 311 373 $custom_taxonomies = $wp_taxonomies; 374 // get rid of the standard taxonomies 375 unset( $custom_taxonomies['category'] ); 376 unset( $custom_taxonomies['post_tag'] ); 377 unset( $custom_taxonomies['link_category'] ); 312 foreach ( $this->terms as $term ) { 313 // if the term already exists in the correct taxonomy leave it alone 314 $term_id = term_exists( $term['slug'], $term['term_taxonomy'] ); 315 if ( $term_id ) { 316 if ( is_array($term_id) ) $term_id = $term_id['term_id']; 317 $this->processed_terms[intval($term['term_id'])] = (int) $term_id; 318 continue; 319 } 378 320 379 $custom_taxonomies = array_keys( $custom_taxonomies ); 380 $current_terms = (array) get_terms( $custom_taxonomies, array('get' => 'all') ); 381 $taxonomies = array(); 382 foreach ( $current_terms as $term ) { 383 if ( isset( $_terms[$term->taxonomy] ) ) { 384 $taxonomies[$term->taxonomy] = array_merge( $taxonomies[$term->taxonomy], array($term->name) ); 321 if ( empty( $term['term_parent'] ) ) { 322 $parent = 0; 385 323 } else { 386 $taxonomies[$term->taxonomy] = array($term->name); 324 $parent = term_exists( $term['term_parent'], $term['term_taxonomy'] ); 325 if ( is_array( $parent ) ) $parent = $parent['term_id']; 387 326 } 388 } 327 $description = isset( $term['term_description'] ) ? $term['term_description'] : ''; 328 $termarr = array( 'slug' => $term['slug'], 'description' => $description, 'parent' => intval($parent) ); 389 329 390 while ( $c = array_shift($this->terms) ) { 391 $term_name = trim($this->get_tag( $c, 'wp:term_name' )); 392 $term_taxonomy = trim($this->get_tag( $c, 'wp:term_taxonomy' )); 393 394 // If the term exists in the taxonomy we leave it alone 395 if ( isset($taxonomies[$term_taxonomy] ) && in_array( $term_name, $taxonomies[$term_taxonomy] ) ) 330 $id = wp_insert_term( $term['term_name'], $term['term_taxonomy'], $termarr ); 331 if ( ! is_wp_error( $id ) ) { 332 $this->processed_terms[intval($term['term_id'])] = $id['term_id']; 333 } else { 334 echo 'Error importing term: ' . $id->get_error_message() . '<br />'; 396 335 continue; 397 398 $slug = $this->get_tag( $c, 'wp:term_slug' ); 399 $description = $this->get_tag( $c, 'wp:term_description' ); 400 401 $termarr = compact('slug', 'description'); 402 403 print '<em>' . sprintf( __( 'Importing <em>%s</em>…' , 'wordpress-importer'), esc_html($term_name) ) . '</em><br />' . "\n"; 404 $term_ID = wp_insert_term($term_name, $this->get_tag( $c, 'wp:term_taxonomy' ), $termarr); 336 } 405 337 } 406 338 } 407 339 408 function process_author($post) { 409 $author = $this->get_tag( $post, 'dc:creator' ); 410 if ($author) 411 $this->allauthornames[] = $author; 412 } 413 340 // extract and compact (?) 414 341 function process_posts() { 415 echo '<ol>'; 342 foreach ( $this->posts as $post ) { 343 if ( isset( $this->processed_posts[$post['post_id']] ) ) 344 continue; 416 345 417 $this->get_entries(array(&$this, 'process_post')); 346 if ( 'nav_menu_item' == $post['post_type'] ) { 347 $this->process_menu_item( $post ); 348 continue; 349 } 418 350 419 echo '</ol>'; 351 $post_exists = post_exists( $post['post_title'], '', $post['post_date'] ); 352 if ( $post_exists ) { 353 $comment_post_ID = $post_id = $post_exists; 354 } else { 355 $post_parent = (int) $post['post_parent']; 356 if ( $post_parent ) { 357 // if we already know the parent, map it to the new local ID 358 if ( isset( $this->processed_posts[$post_parent] ) ) { 359 $post_parent = $this->processed_posts[$post_parent]; 360 // otherwise record the parent for later 361 } else { 362 $this->post_orphans[intval($post['post_id'])] = $post_parent; 363 $post_parent = 0; 364 } 365 } 420 366 421 wp_import_cleanup($this->id); 422 do_action('import_done', 'wordpress'); 367 // map the post author 368 if ( isset( $this->processed_authors[$post['post_author']] ) ) 369 $author = $this->processed_authors[$post['post_author']]; 370 else 371 $author = (int) get_current_user_id(); 423 372 424 echo '<h3>'.sprintf(__('All done.', 'wordpress-importer').' <a href="%s">'.__('Have fun!', 'wordpress-importer').'</a>', get_option('home')).'</h3>'; 425 } 373 $postdata = array( 374 'import_id' => $post['post_id'], 'post_author' => $author, 'post_date' => $post['post_date'], 375 'post_date_gmt' => $post['post_date_gmt'], 'post_content' => $post['post_content'], 376 'post_excerpt' => $post['post_excerpt'], 'post_title' => $post['post_title'], 377 'post_status' => $post['status'], 'post_name' => $post['post_name'], 378 'comment_status' => $post['comment_status'], 'ping_status' => $post['ping_status'], 379 'guid' => $post['guid'], 'post_parent' => $post_parent, 'menu_order' => $post['menu_order'], 380 'post_type' => $post['post_type'], 'post_password' => $post['post_password'] 381 ); 382 383 if ( 'attachment' == $postdata['post_type'] ) { 384 $remote_url = ! empty($post['attachment_url']) ? $post['attachment_url'] : $post['guid']; 385 $comment_post_ID = $post_id = $this->process_attachment( $postdata, $remote_url ); 386 } else { 387 $comment_post_ID = $post_id = wp_insert_post( $postdata, true ); 388 } 426 389 427 function _normalize_tag( $matches ) { 428 return '<' . strtolower( $matches[1] ); 429 } 430 431 function process_post($post) { 432 global $wpdb; 433 434 $post_ID = (int) $this->get_tag( $post, 'wp:post_id' ); 435 if ( $post_ID && !empty($this->post_ids_processed[$post_ID]) ) // Processed already 436 return 0; 437 438 set_time_limit( 60 ); 439 440 // There are only ever one of these 441 $post_title = $this->get_tag( $post, 'title' ); 442 $post_date = $this->get_tag( $post, 'wp:post_date' ); 443 $post_date_gmt = $this->get_tag( $post, 'wp:post_date_gmt' ); 444 $comment_status = $this->get_tag( $post, 'wp:comment_status' ); 445 $ping_status = $this->get_tag( $post, 'wp:ping_status' ); 446 $post_status = $this->get_tag( $post, 'wp:status' ); 447 $post_name = $this->get_tag( $post, 'wp:post_name' ); 448 $post_parent = $this->get_tag( $post, 'wp:post_parent' ); 449 $menu_order = $this->get_tag( $post, 'wp:menu_order' ); 450 $post_type = $this->get_tag( $post, 'wp:post_type' ); 451 $post_password = $this->get_tag( $post, 'wp:post_password' ); 452 $is_sticky = $this->get_tag( $post, 'wp:is_sticky' ); 453 $guid = $this->get_tag( $post, 'guid' ); 454 $post_author = $this->get_tag( $post, 'dc:creator' ); 455 456 $post_excerpt = $this->get_tag( $post, 'excerpt:encoded' ); 457 $post_excerpt = preg_replace_callback('|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_excerpt); 458 $post_excerpt = str_replace('<br>', '<br />', $post_excerpt); 459 $post_excerpt = str_replace('<hr>', '<hr />', $post_excerpt); 460 461 $post_content = $this->get_tag( $post, 'content:encoded' ); 462 $post_content = preg_replace_callback('|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_content); 463 $post_content = str_replace('<br>', '<br />', $post_content); 464 $post_content = str_replace('<hr>', '<hr />', $post_content); 465 466 preg_match_all('|<category domain="tag">(.*?)</category>|is', $post, $tags); 467 $tags = $tags[1]; 468 469 $tag_index = 0; 470 foreach ($tags as $tag) { 471 $tags[$tag_index] = $wpdb->escape( html_entity_decode( str_replace(array( '<![CDATA[', ']]>' ), '', $tag ) ) ); 472 $tag_index++; 473 } 474 475 preg_match_all('|<category>(.*?)</category>|is', $post, $categories); 476 $categories = $categories[1]; 477 478 $cat_index = 0; 479 foreach ($categories as $category) { 480 $categories[$cat_index] = $wpdb->escape( html_entity_decode( str_replace( array( '<![CDATA[', ']]>' ), '', $category ) ) ); 481 $cat_index++; 482 } 483 484 $post_exists = post_exists($post_title, '', $post_date); 485 486 if ( $post_exists ) { 487 echo '<li>'; 488 printf(__('Post <em>%s</em> already exists.', 'wordpress-importer'), stripslashes($post_title)); 489 $comment_post_ID = $post_id = $post_exists; 490 } else { 491 492 // If it has parent, process parent first. 493 $post_parent = (int) $post_parent; 494 if ($post_parent) { 495 // if we already know the parent, map it to the local ID 496 if ( isset( $this->post_ids_processed[$post_parent] ) ) { 497 $post_parent = $this->post_ids_processed[$post_parent]; // new ID of the parent 390 if ( is_wp_error( $post_id ) ) { 391 echo 'Error importing post object: ' . $post_id->get_error_message() . '<br />'; 392 continue; 498 393 } 499 else {500 // record the parent for later501 $this->orphans[intval($post_ID)] = $post_parent;502 }503 }504 394 505 echo '<li>'; 506 507 $post_author = $this->checkauthor($post_author); //just so that if a post already exists, new users are not created by checkauthor 508 509 $postdata = compact('post_author', 'post_date', 'post_date_gmt', 'post_content', 'post_excerpt', 'post_title', 'post_status', 'post_name', 'comment_status', 'ping_status', 'guid', 'post_parent', 'menu_order', 'post_type', 'post_password'); 510 $postdata['import_id'] = $post_ID; 511 if ($post_type == 'attachment') { 512 $remote_url = $this->get_tag( $post, 'wp:attachment_url' ); 513 if ( !$remote_url ) 514 $remote_url = $guid; 515 516 $comment_post_ID = $post_id = $this->process_attachment($postdata, $remote_url); 517 if ( !$post_id or is_wp_error($post_id) ) 518 return $post_id; 519 } 520 else { 521 printf(__('Importing post <em>%s</em>...', 'wordpress-importer') . "\n", stripslashes($post_title)); 522 $comment_post_ID = $post_id = wp_insert_post($postdata); 523 if ( $post_id && $is_sticky == 1 ) 395 if ( $post['is_sticky'] == 1 ) 524 396 stick_post( $post_id ); 525 526 397 } 527 398 528 if ( is_wp_error( $post_id ) )529 return$post_id;399 // map pre-import ID to local ID 400 $this->processed_posts[intval($post['post_id'])] = (int) $post_id; 530 401 531 // Memorize old and new ID. 532 if ( $post_id && $post_ID ) { 533 $this->post_ids_processed[intval($post_ID)] = intval($post_id); 534 } 535 536 // Add categories. 537 if (count($categories) > 0) { 538 $post_cats = array(); 539 foreach ($categories as $category) { 540 if ( '' == $category ) 541 continue; 542 $slug = sanitize_term_field('slug', $category, 0, 'category', 'db'); 543 $cat = get_term_by('slug', $slug, 'category'); 544 $cat_ID = 0; 545 if ( ! empty($cat) ) 546 $cat_ID = $cat->term_id; 547 if ($cat_ID == 0) { 548 $category = $wpdb->escape($category); 549 $cat_ID = wp_insert_category(array('cat_name' => $category)); 550 if ( is_wp_error($cat_ID) ) 402 // add categories, tags and other terms 403 if ( ! empty( $post['terms'] ) ) { 404 foreach ( $post['terms'] as $term ) { 405 $term_exists = term_exists( $term['slug'], $term['domain'] ); 406 $term_id = is_array( $term_exists ) ? $term_exists['term_id'] : $term_exists; 407 if ( ! $term_id ) { 408 $t = wp_insert_term( $term['name'], $term['domain'], array( 'slug' => $term['slug'] ) ); 409 if ( ! is_wp_error( $t ) ) { 410 $term_id = $t['term_id']; 411 } else { 412 echo $term['name'] . ' :: ' . $t->get_error_message(); 551 413 continue; 414 } 552 415 } 553 $ post_cats[] = $cat_ID;416 $terms_to_set[$term['domain']][] = intval( $term_id ); 554 417 } 555 wp_set_post_categories($post_id, $post_cats); 418 419 foreach ( $terms_to_set as $tax => $ids ) { 420 $tt_ids = wp_set_post_terms( $post_id, $ids, $tax ); 421 } 422 unset( $post['terms'], $terms_to_set ); 556 423 } 557 424 558 // Add tags. 559 if (count($tags) > 0) { 560 $post_tags = array(); 561 foreach ($tags as $tag) { 562 if ( '' == $tag ) 563 continue; 564 $slug = sanitize_term_field('slug', $tag, 0, 'post_tag', 'db'); 565 $tag_obj = get_term_by('slug', $slug, 'post_tag'); 566 $tag_id = 0; 567 if ( ! empty($tag_obj) ) 568 $tag_id = $tag_obj->term_id; 569 if ( $tag_id == 0 ) { 570 $tag = $wpdb->escape($tag); 571 $tag_id = wp_insert_term($tag, 'post_tag'); 572 if ( is_wp_error($tag_id) ) 573 continue; 574 $tag_id = $tag_id['term_id']; 425 // add/update comments 426 if ( ! empty( $post['comments'] ) ) { 427 $num_comments = 0; 428 $inserted_comments = array(); 429 foreach ( $post['comments'] as $comment ) { 430 $comment_id = $comment['comment_id']; 431 $newcomments[$comment_id]['comment_post_ID'] = $comment_post_ID; 432 $newcomments[$comment_id]['comment_author'] = $comment['comment_author']; 433 $newcomments[$comment_id]['comment_author_email'] = $comment['comment_author_email']; 434 $newcomments[$comment_id]['comment_author_IP'] = $comment['comment_author_IP']; 435 $newcomments[$comment_id]['comment_author_url'] = $comment['comment_author_url']; 436 $newcomments[$comment_id]['comment_date'] = $comment['comment_date']; 437 $newcomments[$comment_id]['comment_date_gmt'] = $comment['comment_date_gmt']; 438 $newcomments[$comment_id]['comment_content'] = $comment['comment_content']; 439 $newcomments[$comment_id]['comment_approved'] = $comment['comment_approved']; 440 $newcomments[$comment_id]['comment_type'] = ! empty( $comment['comment_type'] ) ? $comment['comment_type'] : 'comment'; 441 $newcomments[$comment_id]['comment_parent'] = $comment['comment_parent']; 442 } 443 ksort( $newcomments ); 444 445 foreach ( $newcomments as $key => $comment ) { 446 // if this is a new post we can skip the comment_exists() check 447 if ( ! $post_exists || ! comment_exists( $comment['comment_author'], $comment['comment_date'] ) ) { 448 if ( isset( $inserted_comments[$comment['comment_parent']] ) ) 449 $comment['comment_parent'] = $inserted_comments[$comment['comment_parent']]; 450 $comment = wp_filter_comment( $comment ); 451 $inserted_comments[$key] = wp_insert_comment( $comment ); 452 $num_comments++; 575 453 } 576 $post_tags[] = intval($tag_id);577 454 } 578 wp_set_post_tags($post_id, $post_tags);455 unset( $newcomments, $inserted_comments, $post['comments'] ); 579 456 } 580 }581 457 582 // Now for comments 583 preg_match_all('|<wp:comment>(.*?)</wp:comment>|is', $post, $comments); 584 $comments = $comments[1]; 585 $num_comments = 0; 586 $inserted_comments = array(); 587 if ( $comments) { 588 foreach ($comments as $comment) { 589 $comment_id = $this->get_tag( $comment, 'wp:comment_id'); 590 $newcomments[$comment_id]['comment_post_ID'] = $comment_post_ID; 591 $newcomments[$comment_id]['comment_author'] = $this->get_tag( $comment, 'wp:comment_author'); 592 $newcomments[$comment_id]['comment_author_email'] = $this->get_tag( $comment, 'wp:comment_author_email'); 593 $newcomments[$comment_id]['comment_author_IP'] = $this->get_tag( $comment, 'wp:comment_author_IP'); 594 $newcomments[$comment_id]['comment_author_url'] = $this->get_tag( $comment, 'wp:comment_author_url'); 595 $newcomments[$comment_id]['comment_date'] = $this->get_tag( $comment, 'wp:comment_date'); 596 $newcomments[$comment_id]['comment_date_gmt'] = $this->get_tag( $comment, 'wp:comment_date_gmt'); 597 $newcomments[$comment_id]['comment_content'] = $this->get_tag( $comment, 'wp:comment_content'); 598 $newcomments[$comment_id]['comment_approved'] = $this->get_tag( $comment, 'wp:comment_approved'); 599 $newcomments[$comment_id]['comment_type'] = $this->get_tag( $comment, 'wp:comment_type'); 600 $newcomments[$comment_id]['comment_parent'] = $this->get_tag( $comment, 'wp:comment_parent'); 601 } 602 // Sort by comment ID, to make sure comment parents exist (if there at all) 603 ksort($newcomments); 604 foreach ($newcomments as $key => $comment) { 605 // if this is a new post we can skip the comment_exists() check 606 if ( !$post_exists || !comment_exists($comment['comment_author'], $comment['comment_date']) ) { 607 if (isset($inserted_comments[$comment['comment_parent']])) 608 $comment['comment_parent'] = $inserted_comments[$comment['comment_parent']]; 609 $comment = wp_filter_comment($comment); 610 $inserted_comments[$key] = wp_insert_comment($comment); 611 $num_comments++; 458 // add/update post meta 459 if ( isset( $post['postmeta'] ) ) { 460 foreach ( $post['postmeta'] as $meta ) { 461 $key = apply_filters( 'import_post_meta_key', $meta['key'] ); 462 if ( $key ) { 463 update_post_meta( $post_id, $key, $meta['value'] ); 464 do_action( 'import_post_meta', $post_id, $key, $meta['value'] ); 465 } 612 466 } 613 467 } 614 468 } 615 616 if ( $num_comments )617 printf(' '._n('(%s comment)', '(%s comments)', $num_comments, 'wordpress-importer'), $num_comments);618 619 // Now for post meta620 preg_match_all('|<wp:postmeta>(.*?)</wp:postmeta>|is', $post, $postmeta);621 $postmeta = $postmeta[1];622 if ( $postmeta) { foreach ($postmeta as $p) {623 $key = $this->get_tag( $p, 'wp:meta_key' );624 $value = $this->get_tag( $p, 'wp:meta_value' );625 626 $this->process_post_meta($post_id, $key, $value);627 628 } }629 630 do_action('import_post_added', $post_id);631 print "</li>\n";632 469 } 633 470 634 function process_post_meta($post_id, $key, $value) { 635 // the filter can return false to skip a particular metadata key 636 $_key = apply_filters('import_post_meta_key', $key); 637 if ( $_key ) { 638 add_post_meta( $post_id, $_key, $value ); 639 do_action('import_post_meta', $post_id, $_key, $value); 640 } 641 } 642 643 function process_attachment($postdata, $remote_url) { 644 if ($this->fetch_attachments and $remote_url) { 645 printf( __('Importing attachment <em>%s</em>... ', 'wordpress-importer'), htmlspecialchars($remote_url) ); 646 647 // If the URL is absolute, but does not contain http, upload it assuming the base_site_url variable 648 if ( preg_match('/^\/[\w\W]+$/', $remote_url) ) 649 $remote_url = rtrim($this->base_url,'/').$remote_url; 650 651 $upload = $this->fetch_remote_file($postdata, $remote_url); 652 if ( is_wp_error($upload) ) { 653 printf( __('Remote file error: %s', 'wordpress-importer'), htmlspecialchars($upload->get_error_message()) ); 654 return $upload; 655 } 656 else { 657 print '('.size_format(filesize($upload['file'])).')'; 658 } 659 660 if ( 0 == filesize( $upload['file'] ) ) { 661 print __( "Zero length file, deleting" , 'wordpress-importer') . "\n"; 662 unlink( $upload['file'] ); 471 function process_menu_item( $item ) { 472 if ( isset( $item['terms'][0]['slug'] ) ) { 473 $menu_id = term_exists( $item['terms'][0]['slug'], 'nav_menu' ); 474 if ( ! $menu_id ) { 475 echo 'Menu item skipped due to invalid menu slug'; 663 476 return; 477 } else { 478 $menu_id = is_array( $menu_id ) ? $menu_id['term_id'] : $menu_id; 664 479 } 480 } else { 481 echo 'Menu item skipped due to missing menu slug'; 482 return; 483 } 665 484 666 if ( $info = wp_check_filetype($upload['file']) ) { 667 $postdata['post_mime_type'] = $info['type']; 668 } 669 else { 670 print __('Invalid file type', 'wordpress-importer'); 671 return; 672 } 485 foreach ( $item['postmeta'] as $meta ) 486 $$meta['key'] = $meta['value']; 673 487 674 $postdata['guid'] = $upload['url']; 488 if ( 'taxonomy' == $_menu_item_type && isset( $this->processed_terms[intval($_menu_item_object_id)] ) ) { 489 $_menu_item_object_id = $this->processed_terms[intval($_menu_item_object_id)]; 490 } else if ( 'post_type' == $_menu_item_type && isset( $this->processed_posts[intval($_menu_item_object_id)] ) ) { 491 $_menu_item_object_id = $this->processed_posts[intval($_menu_item_object_id)]; 492 } else if ( 'custom' != $_menu_item_type ) { 493 // associated object is missing or not imported yet, we'll retry later 494 $this->missing_menu_items[] = $item; 495 return; 496 } 675 497 676 // as per wp-admin/includes/upload.php 677 $post_id = wp_insert_attachment($postdata, $upload['file']); 678 wp_update_attachment_metadata( $post_id, wp_generate_attachment_metadata( $post_id, $upload['file'] ) ); 498 if ( isset( $this->processed_menu_items[intval($_menu_item_menu_item_parent)] ) ) { 499 $_menu_item_menu_item_parent = $this->processed_menu_items[intval($_menu_item_menu_item_parent)]; 500 } else if ( $_menu_item_menu_item_parent ) { 501 $this->menu_item_orphans[intval($item['post_id'])] = (int) $_menu_item_menu_item_parent; 502 $_menu_item_menu_item_parent = 0; 503 } 679 504 680 // remap the thumbnail url. this isn't perfect because we're just guessing the original url. 681 if ( preg_match('@^image/@', $info['type']) && $thumb_url = wp_get_attachment_thumb_url($post_id) ) { 682 $parts = pathinfo($remote_url); 683 $ext = $parts['extension']; 684 $name = basename($parts['basename'], ".{$ext}"); 685 $this->url_remap[$parts['dirname'] . '/' . $name . '.thumbnail.' . $ext] = $thumb_url; 686 } 505 $args = array( 506 'menu-item-object-id' => $_menu_item_object_id, 507 'menu-item-object' => $_menu_item_object, 508 'menu-item-parent-id' => $_menu_item_menu_item_parent, 509 'menu-item-position' => intval( $item['menu_order'] ), 510 'menu-item-type' => $_menu_item_type, 511 'menu-item-title' => $item['post_title'], 512 'menu-item-url' => $_menu_item_url, 513 'menu-item-description' => $item['post_content'], 514 'menu-item-attr-title' => $item['post_excerpt'], 515 'menu-item-target' => $_menu_item_target, 516 'menu-item-classes' => $_menu_item_classes, 517 'menu-item-xfn' => $_menu_item_xfn, 518 'menu-item-status' => $item['status'] 519 ); 687 520 688 return $post_id; 689 } 690 else { 691 printf( __('Skipping attachment <em>%s</em>', 'wordpress-importer'), htmlspecialchars($remote_url) ); 692 } 521 $id = wp_update_nav_menu_item( $menu_id, 0, $args ); 522 if ( $id && ! is_wp_error( $id ) ) 523 $this->processed_menu_items[intval($item['post_id'])] = (int) $id; 693 524 } 525 526 function process_attachment( $post, $url ) { 527 if ( ! ( $this->fetch_attachments && $url ) ) 528 return new WP_Error( 'attachment_processing_error', 529 __( 'Fetching attachments is not allowed or an empty URL was provided', 'wordpress-importer' ) ); 530 531 // if the URL is absolute, but does not contain address, then upload it assuming base_site_url 532 if ( preg_match( '|^/[\w\W]+$|', $url ) ) 533 $url = rtrim( $this->base_url, '/' ) . $url; 534 535 $upload = $this->fetch_remote_file( $url, $post ); 536 if ( is_wp_error( $upload ) ) 537 return $upload; 538 539 if ( $info = wp_check_filetype( $upload['file'] ) ) 540 $post['post_mime_type'] = $info['type']; 541 else 542 return new WP_Error( 'attachment_processing_error', __('Invalid file type', 'wordpress-importer') ); 543 544 $post['guid'] = $upload['url']; 694 545 695 function fetch_remote_file( $post, $url ) { 696 add_filter( 'http_request_timeout', array( &$this, 'bump_request_timeout' ) ); 546 // as per wp-admin/includes/upload.php 547 $post_id = wp_insert_attachment( $post, $upload['file'] ); 548 wp_update_attachment_metadata( $post_id, wp_generate_attachment_metadata( $post_id, $upload['file'] ) ); 697 549 698 $upload = wp_upload_dir($post['post_date']); 550 // remap the thumbnail url. this isn't perfect because we're just guessing the original url. 551 if ( preg_match( '@^image/@', $info['type'] ) && $thumb_url = wp_get_attachment_thumb_url( $post_id ) ) { 552 $parts = pathinfo( $url ); 553 $ext = $parts['extension']; 554 $name = basename($parts['basename'], ".{$ext}"); 555 $this->url_remap[$parts['dirname'] . '/' . $name . '.thumbnail.' . $ext] = $thumb_url; 556 } 699 557 558 return $post_id; 559 } 560 561 function fetch_remote_file( $url, $post ) { 562 add_filter( 'http_request_timeout', array( &$this, 'bump_request_timeout' ) ); 563 700 564 // extract the file name and extension from the url 701 $file_name = basename($url); 702 703 // get placeholder file in the upload dir with a unique sanitized filename 704 $upload = wp_upload_bits( $file_name, 0, '', $post['post_date']); 705 if ( $upload['error'] ) { 706 echo $upload['error']; 565 $file_name = basename( $url ); 566 567 // get placeholder file in the upload dir with a unique, sanitized filename 568 $upload = wp_upload_bits( $file_name, 0, '', $post['post_date'] ); 569 if ( $upload['error'] ) 707 570 return new WP_Error( 'upload_dir_error', $upload['error'] ); 708 }709 571 710 572 // fetch the remote url and write it to the placeholder file 711 $headers = wp_get_http( $url, $upload['file']);573 $headers = wp_get_http( $url, $upload['file'] ); 712 574 713 // Request failed575 // request failed 714 576 if ( ! $headers ) { 715 @unlink( $upload['file']);577 @unlink( $upload['file'] ); 716 578 return new WP_Error( 'import_file_error', __('Remote server did not respond', 'wordpress-importer') ); 717 579 } 718 580 719 581 // make sure the fetch was successful 720 582 if ( $headers['response'] != '200' ) { 721 @unlink( $upload['file']);722 return new WP_Error( 'import_file_error', sprintf( __('Remote filereturned error response %1$d %2$s', 'wordpress-importer'), $headers['response'], get_status_header_desc($headers['response']) ) );583 @unlink( $upload['file'] ); 584 return new WP_Error( 'import_file_error', sprintf( __('Remote server returned error response %1$d %2$s', 'wordpress-importer'), $headers['response'], get_status_header_desc($headers['response']) ) ); 723 585 } 724 elseif ( isset($headers['content-length']) && filesize($upload['file']) != $headers['content-length'] ) { 725 @unlink($upload['file']); 586 587 $filesize = filesize( $upload['file'] ); 588 589 if ( isset( $headers['content-length'] ) && $filesize != $headers['content-length'] ) { 590 @unlink( $upload['file'] ); 726 591 return new WP_Error( 'import_file_error', __('Remote file is incorrect size', 'wordpress-importer') ); 727 592 } 593 594 if ( 0 == $filesize ) { 595 @unlink( $upload['file'] ); 596 return new WP_Error( 'import_file_error', __('Zero size file downloaded', 'wordpress-importer') ); 597 } 728 598 729 $max_size = $this->max_attachment_size();730 if ( ! empty($max_size) and filesize($upload['file'])> $max_size ) {731 @unlink( $upload['file']);599 $max_size = (int) $this->max_attachment_size(); 600 if ( ! empty( $max_size ) && $filesize > $max_size ) { 601 @unlink( $upload['file'] ); 732 602 return new WP_Error( 'import_file_error', sprintf(__('Remote file is too large, limit is %s', size_format($max_size), 'wordpress-importer')) ); 733 603 } 734 604 … … 736 606 $this->url_remap[$url] = $upload['url']; 737 607 $this->url_remap[$post['guid']] = $upload['url']; 738 608 // if the remote url is redirected somewhere else, keep track of the destination too 739 if ( $headers['x-final-location'] != $url )609 if ( isset($headers['x-final-location']) && $headers['x-final-location'] != $url ) 740 610 $this->url_remap[$headers['x-final-location']] = $upload['url']; 741 611 742 return $upload; 743 612 return $upload; 744 613 } 745 614 746 /** 747 * Bump up the request timeout for http requests 748 * 749 * @param int $val 750 * @return int 751 */ 752 function bump_request_timeout( $val ) { 753 return 60; 754 } 615 function backfill_parents() { 616 global $wpdb; 755 617 756 // sort by strlen, longest string first 757 function cmpr_strlen($a, $b) { 758 return strlen($b) - strlen($a); 618 foreach ( $this->post_orphans as $child_id => $parent_id ) { 619 $local_child_id = $local_parent_id = false; 620 if ( isset( $this->processed_posts[$child_id] ) ) 621 $local_child_id = $this->processed_posts[$child_id]; 622 if ( isset( $this->processed_posts[$parent_id] ) ) 623 $local_parent_id = $this->processed_posts[$parent_id]; 624 625 if ( $local_child_id && $local_parent_id ) 626 $wpdb->update( $wpdb->posts, array( 'post_parent' => $local_parent_id ), array( 'ID' => $local_child_id ), '%d', '%d' ); 627 } 628 629 // all other posts/terms are imported, retry menu items with missing associated object 630 $missing_menu_items = $this->missing_menu_items; 631 foreach ( $missing_menu_items as $item ) 632 $this->process_menu_item( $item ); 633 634 foreach ( $this->menu_item_orphans as $child_id => $parent_id ) { 635 $local_child_id = $local_parent_id = 0; 636 if ( isset( $this->processed_menu_items[$child_id] ) ) 637 $local_child_id = $this->processed_menu_items[$child_id]; 638 if ( isset( $this->processed_menu_items[$parent_id] ) ) 639 $local_parent_id = $this->processed_menu_items[$parent_id]; 640 641 if ( $local_child_id && $local_parent_id ) 642 update_post_meta( $local_child_id, '_menu_item_menu_item_parent', (int) $local_parent_id ); 643 } 759 644 } 760 645 761 // update url references in post bodies to point to the new local files762 646 function backfill_attachment_urls() { 763 647 global $wpdb; 648 764 649 // make sure we do the longest urls first, in case one is a substring of another 765 uksort( $this->url_remap, array(&$this, 'cmpr_strlen'));650 uksort( $this->url_remap, array(&$this, 'cmpr_strlen') ); 766 651 767 global $wpdb; 768 foreach ($this->url_remap as $from_url => $to_url) { 652 foreach ( $this->url_remap as $from_url => $to_url ) { 769 653 // remap urls in post_content 770 $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->posts} SET post_content = REPLACE(post_content, '%s', '%s')", $from_url, $to_url) );654 $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->posts} SET post_content = REPLACE(post_content, %s, %s)", $from_url, $to_url) ); 771 655 // remap enclosure urls 772 $result = $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->postmeta} SET meta_value = REPLACE(meta_value, '%s', '%s') WHERE meta_key='enclosure'", $from_url, $to_url) );656 $result = $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->postmeta} SET meta_value = REPLACE(meta_value, %s, %s) WHERE meta_key='enclosure'", $from_url, $to_url) ); 773 657 } 658 } 659 660 function parse( $file ) { 661 $parser = new WXR_Parser(); 662 return $parser->parse( $file ); 774 663 } 775 664 776 // update the post_parent of orphans now that we know the local id's of all parents 777 function backfill_parents() { 778 global $wpdb; 665 function header() { 666 echo '<div class="wrap">'; 667 screen_icon(); 668 echo '<h2>' . __( 'Import WordPress', 'wordpress-importer' ) . '</h2>'; 669 } 779 670 780 foreach ($this->orphans as $child_id => $parent_id) { 781 $local_child_id = $local_parent_id = false; 782 if ( isset( $this->post_ids_processed[$child_id] ) ) 783 $local_child_id = $this->post_ids_processed[$child_id]; 784 if ( isset( $this->post_ids_processed[$parent_id] ) ) 785 $local_parent_id = $this->post_ids_processed[$parent_id]; 671 function footer() { 672 echo '</div>'; 673 } 786 674 787 if ($local_child_id and $local_parent_id) { 788 $wpdb->update($wpdb->posts, array('post_parent' => $local_parent_id), array('ID' => $local_child_id) ); 789 } 790 } 675 function greet() { 676 echo '<div class="narrow">'; 677 echo '<p>'.__( 'Howdy! Upload your WordPress eXtended RSS (WXR) file and we’ll import the posts, pages, comments, custom fields, categories, and tags into this site.', 'wordpress-importer' ).'</p>'; 678 echo '<p>'.__( 'Choose a WXR file to upload, then click Upload file and import.', 'wordpress-importer' ).'</p>'; 679 wp_import_upload_form( 'admin.php?import=wordpress&step=1' ); 680 echo '</div>'; 791 681 } 792 682 793 function is_valid_meta_key( $key) {683 function is_valid_meta_key( $key ) { 794 684 // skip attachment metadata since we'll regenerate it from scratch 795 if ( $key == '_wp_attached_file' || $key == '_wp_attachment_metadata' ) 685 // skip _edit_lock and _edit_last 686 if ( in_array( $key, array( '_wp_attached_file', '_wp_attachment_metadata', '_edit_lock', '_edit_last' ) ) ) 796 687 return false; 797 688 return $key; 798 689 } 799 690 800 691 // give the user the option of creating new users to represent authors in the import file? 801 692 function allow_create_users() { 802 return apply_filters( 'import_allow_create_users', true);693 return apply_filters( 'import_allow_create_users', true ); 803 694 } 804 695 805 696 // give the user the option of downloading and importing attached files 806 697 function allow_fetch_attachments() { 807 return apply_filters( 'import_allow_fetch_attachments', true);698 return apply_filters( 'import_allow_fetch_attachments', true ); 808 699 } 809 700 701 function bump_request_timeout() { 702 return 60; 703 } 704 810 705 function max_attachment_size() { 811 // can be overridden with a filter - 0 means no limit 812 return apply_filters('import_attachment_size_limit', 0); 706 return apply_filters( 'import_attachment_size_limit', 0 ); 813 707 } 814 815 function import_start() { 816 wp_defer_term_counting(true); 817 wp_defer_comment_counting(true); 818 do_action('import_start'); 819 } 820 821 function import_end() { 822 do_action('import_end'); 823 824 // clear the caches after backfilling 825 foreach ($this->post_ids_processed as $post_id) 826 clean_post_cache($post_id); 827 828 wp_defer_term_counting(false); 829 wp_defer_comment_counting(false); 830 } 831 832 function import($id, $fetch_attachments = false) { 833 $this->id = (int) $id; 834 $this->fetch_attachments = ($this->allow_fetch_attachments() && (bool) $fetch_attachments); 835 836 add_filter('import_post_meta_key', array($this, 'is_valid_meta_key')); 837 $file = get_attached_file($this->id); 838 $this->import_file($file); 839 } 840 841 function import_file($file) { 842 $this->file = $file; 843 844 $this->import_start(); 845 $this->get_authors_from_post(); 846 wp_suspend_cache_invalidation(true); 847 $this->get_entries(); 848 $this->process_categories(); 849 $this->process_tags(); 850 $this->process_terms(); 851 $result = $this->process_posts(); 852 wp_suspend_cache_invalidation(false); 853 $this->backfill_parents(); 854 $this->backfill_attachment_urls(); 855 $this->import_end(); 856 857 if ( is_wp_error( $result ) ) 858 return $result; 859 } 860 861 function handle_upload() { 862 $file = wp_import_handle_upload(); 863 if ( isset($file['error']) ) { 864 echo '<p>'.__('Sorry, there has been an error.', 'wordpress-importer').'</p>'; 865 echo '<p><strong>' . $file['error'] . '</strong></p>'; 866 return false; 867 } 868 $this->file = $file['file']; 869 $this->id = (int) $file['id']; 870 return true; 871 } 872 873 function dispatch() { 874 if (empty ($_GET['step'])) 875 $step = 0; 876 else 877 $step = (int) $_GET['step']; 878 879 $this->header(); 880 switch ($step) { 881 case 0 : 882 $this->greet(); 883 break; 884 case 1 : 885 check_admin_referer('import-upload'); 886 if ( $this->handle_upload() ) 887 $this->select_authors(); 888 break; 889 case 2: 890 check_admin_referer('import-wordpress'); 891 $fetch_attachments = ! empty( $_POST['attachments'] ); 892 $result = $this->import( $_GET['id'], $fetch_attachments); 893 if ( is_wp_error( $result ) ) 894 echo $result->get_error_message(); 895 break; 896 } 897 $this->footer(); 898 } 899 900 function WP_Import() { 901 // Nothing. 902 } 708 709 function cmpr_strlen( $a, $b ) { 710 return strlen($b) - strlen($a); 711 } 903 712 } 904 713 905 714 /**