Ticket #15197: 15197-import.002.diff
File 15197-import.002.diff, 76.8 KB (added by , 15 years ago) |
---|
-
trunk/parsers.php
1 <?php 2 /** 3 * WordPress eXtended RSS file parser implementations 4 * 5 * @package WordPress 6 * @subpackage Importer 7 */ 8 9 /** 10 * WordPress Importer class for managing parsing of WXR files. 11 */ 12 class WXR_Parser { 13 function parse( $file ) { 14 if ( extension_loaded( 'simplexml' ) ) 15 $parser = new WXR_Parser_SimpleXML; 16 else if ( extension_loaded( 'xml' ) ) 17 $parser = new WXR_Parser_XML; 18 else 19 $parser = new WXR_Parser_Regex; 20 21 return $parser->parse( $file ); 22 } 23 } 24 25 /** 26 * WXR Parser that makes use of the SimpleXML PHP extension. 27 */ 28 class WXR_Parser_SimpleXML { 29 function parse( $file ) { 30 $authors = $posts = $categories = $tags = $terms = array(); 31 32 $internal_errors = libxml_use_internal_errors(true); 33 $xml = simplexml_load_file( $file ); 34 // halt if loading produces an error 35 if ( ! $xml ) 36 return new WP_Error( 'WXR_parse_error', __( 'There was an error when reading this WXR file', 'wordpress-importer' ) ); 37 38 $wxr_version = $xml->xpath('/rss/channel/wp:wxr_version'); 39 if ( ! $wxr_version ) 40 return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); 41 42 $wxr_version = (string) trim( $wxr_version[0] ); 43 // confirm that we are dealing with the correct file format 44 if ( ! preg_match( '/^\d\.\d$/', $wxr_version ) ) 45 return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); 46 47 $base_url = $xml->xpath('/rss/channel/wp:base_site_url'); 48 $base_url = (string) trim( $base_url[0] ); 49 50 $namespaces = $xml->getDocNamespaces(); 51 if ( ! isset( $namespaces['wp'] ) ) 52 $namespaces['wp'] = 'http://wordpress.org/export/1.1/'; 53 if ( ! isset( $namespaces['excerpt'] ) ) 54 $namespaces['excerpt'] = 'http://wordpress.org/export/1.1/excerpt/'; 55 56 // grab authors 57 foreach ( $xml->xpath('/rss/channel/wp:author') as $author_arr ) { 58 $a = $author_arr->children( $namespaces['wp'] ); 59 $login = (string) $a->author_login; 60 $authors[$login] = array( 61 'author_login' => $login, 62 'author_email' => (string) $a->author_email, 63 'author_display_name' => (string) $a->author_display_name, 64 'author_first_name' => (string) $a->author_first_name, 65 'author_last_name' => (string) $a->author_last_name 66 ); 67 } 68 69 // grab cats, tags and terms 70 foreach ( $xml->xpath('/rss/channel/wp:category') as $term_arr ) { 71 $t = $term_arr->children( $namespaces['wp'] ); 72 $categories[] = array( 73 'term_id' => (int) $t->term_id, 74 'category_nicename' => (string) $t->category_nicename, 75 'category_parent' => (string) $t->category_parent, 76 'cat_name' => (string) $t->cat_name, 77 'category_description' => (string) $t->category_description 78 ); 79 } 80 81 foreach ( $xml->xpath('/rss/channel/wp:tag') as $term_arr ) { 82 $t = $term_arr->children( $namespaces['wp'] ); 83 $tags[] = array( 84 'term_id' => (int) $t->term_id, 85 'tag_slug' => (string) $t->tag_slug, 86 'tag_name' => (string) $t->tag_name, 87 'tag_description' => (string) $t->tag_description 88 ); 89 } 90 91 foreach ( $xml->xpath('/rss/channel/wp:term') as $term_arr ) { 92 $t = $term_arr->children( $namespaces['wp'] ); 93 $terms[] = array( 94 'term_id' => (int) $t->term_id, 95 'term_taxonomy' => (string) $t->term_taxonomy, 96 'slug' => (string) $t->term_slug, 97 'term_parent' => (string) $t->term_parent, 98 'term_name' => (string) $t->term_name, 99 'term_description' => (string) $t->term_description 100 ); 101 } 102 103 // grab posts 104 foreach ( $xml->channel->item as $item ) { 105 $post = array( 106 'post_title' => (string) $item->title, 107 'guid' => (string) $item->guid, 108 ); 109 110 $dc = $item->children( 'http://purl.org/dc/elements/1.1/' ); 111 $post['post_author'] = (string) $dc->creator; 112 113 $content = $item->children( 'http://purl.org/rss/1.0/modules/content/' ); 114 $excerpt = $item->children( $namespaces['excerpt'] ); 115 $post['post_content'] = (string) $content->encoded; 116 $post['post_excerpt'] = (string) $excerpt->encoded; 117 118 $wp = $item->children( $namespaces['wp'] ); 119 $post['post_id'] = (int) $wp->post_id; 120 $post['post_date'] = (string) $wp->post_date; 121 $post['post_date_gmt'] = (string) $wp->post_date_gmt; 122 $post['comment_status'] = (string) $wp->comment_status; 123 $post['ping_status'] = (string) $wp->ping_status; 124 $post['post_name'] = (string) $wp->post_name; 125 $post['status'] = (string) $wp->status; 126 $post['post_parent'] = (int) $wp->post_parent; 127 $post['menu_order'] = (int) $wp->menu_order; 128 $post['post_type'] = (string) $wp->post_type; 129 $post['post_password'] = (string) $wp->post_password; 130 $post['is_sticky'] = (int) $wp->is_sticky; 131 132 foreach ( $item->category as $c ) { 133 $att = $c->attributes(); 134 if ( isset( $att['nicename'] ) ) 135 $post['terms'][] = array( 136 'name' => (string) $c, 137 'slug' => (string) $att['nicename'], 138 'domain' => (string) $att['domain'] 139 ); 140 } 141 142 foreach ( $wp->postmeta as $meta ) { 143 $post['postmeta'][] = array( 144 'key' => (string) $meta->meta_key, 145 'value' => (string) $meta->meta_value, 146 ); 147 } 148 149 foreach ( $wp->comment as $comment ) { 150 $post['comments'][] = array( 151 'comment_id' => (int) $comment->comment_id, 152 'comment_author' => (string) $comment->comment_author, 153 'comment_author_email' => (string) $comment->comment_author_email, 154 'comment_author_IP' => (string) $comment->comment_author_IP, 155 'comment_author_url' => (string) $comment->comment_author_url, 156 'comment_date' => (string) $comment->comment_date, 157 'comment_date_gmt' => (string) $comment->comment_date_gmt, 158 'comment_content' => (string) $comment->comment_content, 159 'comment_approved' => (string) $comment->comment_approved, 160 'comment_type' => (string) $comment->comment_type, 161 'comment_parent' => (string) $comment->comment_parent, 162 'comment_user_id' => (int) $comment->comment_user_id, 163 ); 164 } 165 166 $posts[] = $post; 167 } 168 169 return array( 170 'authors' => $authors, 171 'posts' => $posts, 172 'categories' => $categories, 173 'tags' => $tags, 174 'terms' => $terms, 175 'base_url' => $base_url 176 ); 177 } 178 } 179 180 /** 181 * WXR Parser that makes use of the XML Parser PHP extension. 182 */ 183 class WXR_Parser_XML { 184 var $wp_tags = array( 185 'wp:post_id', 'wp:post_date', 'wp:post_date_gmt', 'wp:comment_status', 'wp:ping_status', 186 'wp:status', 'wp:post_name', 'wp:post_parent', 'wp:menu_order', 'wp:post_type', 'wp:post_password', 187 'wp:is_sticky', 'wp:term_id', 'wp:category_nicename', 'wp:category_parent', 'wp:cat_name', 'wp:category_description', 188 'wp:tag_slug', 'wp:tag_name', 'wp:tag_description', 'wp:term_taxonomy', 'wp:term_parent', 189 'wp:term_name', 'wp:term_description', 'wp:author_login', 'wp:author_email', 'wp:author_display_name', 190 'wp:author_first_name', 'wp:author_last_name', 191 ); 192 var $wp_sub_tags = array( 193 'wp:comment_id', 'wp:comment_author', 'wp:comment_author_email', 'wp:comment_author_url', 194 'wp:comment_author_IP', 'wp:comment_date', 'wp:comment_date_gmt', 'wp:comment_content', 195 'wp:comment_approved', 'wp:comment_type', 'wp:comment_parent', 'wp:comment_user_id', 196 ); 197 198 function parse( $file ) { 199 $this->is_wxr_file = $this->in_post = $this->cdata = $this->data = $this->sub_data = $this->in_tag = $this->in_sub_tag = false; 200 $this->authors = $this->posts = $this->term = $this->category = $this->tag = array(); 201 202 $xml = xml_parser_create( 'UTF-8' ); 203 xml_parser_set_option( $xml, XML_OPTION_SKIP_WHITE, 1 ); 204 xml_parser_set_option( $xml, XML_OPTION_CASE_FOLDING, 0 ); 205 xml_set_object( $xml, $this ); 206 xml_set_character_data_handler( $xml, 'cdata' ); 207 xml_set_element_handler( $xml, 'tag_open', 'tag_close' ); 208 209 if ( ! xml_parse( $xml, file_get_contents( $file ), true ) ) { 210 $error_code = xml_get_error_code( $xml ); 211 $error_string = xml_error_string( $error_code ); 212 return new WP_Error( 'WXR_parse_error', 'There was an error when reading this WXR file', array( $error_code, $error_string ) ); 213 } 214 xml_parser_free( $xml ); 215 216 if ( ! $this->is_wxr_file ) 217 return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); 218 219 return array( 220 'authors' => $this->authors, 221 'posts' => $this->posts, 222 'categories' => $this->category, 223 'tags' => $this->tag, 224 'terms' => $this->term, 225 'base_url' => $this->base_url 226 ); 227 } 228 229 function tag_open( $parse, $tag, $attr ) { 230 if ( in_array( $tag, $this->wp_tags ) ) { 231 $this->in_tag = substr( $tag, 3 ); 232 return; 233 } 234 235 if ( in_array( $tag, $this->wp_sub_tags ) ) { 236 $this->in_sub_tag = substr( $tag, 3 ); 237 return; 238 } 239 240 switch ( $tag ) { 241 case 'category': 242 if ( isset($attr['domain'], $attr['nicename']) ) { 243 $this->sub_data['domain'] = $attr['domain']; 244 $this->sub_data['slug'] = $attr['nicename']; 245 } 246 break; 247 case 'item': $this->in_post = true; 248 case 'title': if ( $this->in_post ) $this->in_tag = 'post_title'; break; 249 case 'guid': $this->in_tag = 'guid'; break; 250 case 'dc:creator': $this->in_tag = 'post_author'; break; 251 case 'content:encoded': $this->in_tag = 'post_content'; break; 252 case 'excerpt:encoded': $this->in_tag = 'post_excerpt'; break; 253 254 case 'wp:term_slug': $this->in_tag = 'slug'; break; 255 case 'wp:meta_key': $this->in_sub_tag = 'key'; break; 256 case 'wp:meta_value': $this->in_sub_tag = 'value'; break; 257 } 258 } 259 260 function cdata( $parser, $cdata ) { 261 if ( ! trim( $cdata ) ) 262 return; 263 264 $this->cdata .= trim( $cdata ); 265 } 266 267 function tag_close( $parser, $tag ) { 268 switch ( $tag ) { 269 case 'wp:comment': 270 if ( ! empty( $this->sub_data ) ) 271 $this->data['comments'][] = $this->sub_data; 272 $this->sub_data = false; 273 break; 274 case 'category': 275 if ( ! empty( $this->sub_data ) ) { 276 $this->sub_data['name'] = $this->cdata; 277 $this->data['terms'][] = $this->sub_data; 278 } 279 $this->sub_data = false; 280 break; 281 case 'wp:postmeta': 282 if ( ! empty( $this->sub_data ) ) 283 $this->data['postmeta'][] = $this->sub_data; 284 $this->sub_data = false; 285 break; 286 case 'item': 287 $this->posts[] = $this->data; 288 $this->data = false; 289 break; 290 case 'wp:category': 291 case 'wp:tag': 292 case 'wp:term': 293 $n = substr( $tag, 3 ); 294 array_push( $this->$n, $this->data ); 295 $this->data = false; 296 break; 297 case 'wp:author': 298 if ( ! empty($this->data['author_login']) ) 299 $this->authors[$this->data['author_login']] = $this->data; 300 $this->data = false; 301 break; 302 case 'wp:base_site_url': 303 $this->base_url = $this->cdata; 304 break; 305 case 'wp:wxr_version': 306 $this->is_wxr_file = preg_match( '/\d+\.\d+/', $this->cdata ); 307 break; 308 309 default: 310 if ( $this->in_sub_tag ) { 311 $this->sub_data[$this->in_sub_tag] = ! empty( $this->cdata ) ? $this->cdata : ''; 312 $this->in_sub_tag = false; 313 } else if ( $this->in_tag ) { 314 $this->data[$this->in_tag] = ! empty( $this->cdata ) ? $this->cdata : ''; 315 $this->in_tag = false; 316 } 317 } 318 319 $this->cdata = false; 320 } 321 } 322 323 /** 324 * WXR Parser that uses regular expressions. Fallback for installs without an XML parser. 325 */ 326 class WXR_Parser_Regex { 327 function WXR_Parser_Regex() { 328 $this->__construct(); 329 } 330 331 function __construct() { 332 $this->has_gzip = is_callable( 'gzopen' ); 333 } 334 335 function parse( $file ) { 336 $is_wxr = $in_post = false; 337 338 $fp = $this->fopen( $file, 'r' ); 339 if ( $fp ) { 340 while ( ! $this->feof( $fp ) ) { 341 $importline = rtrim( $this->fgets( $fp ) ); 342 343 if ( ! $is_wxr && preg_match( '|<wp:wxr_version>\d+\.\d+</wp:wxr_version>|', $importline ) ) 344 $is_wxr = true; 345 346 if ( false !== strpos( $importline, '<wp:base_site_url>' ) ) { 347 preg_match( '|<wp:base_site_url>(.*?)</wp:base_site_url>|is', $importline, $url ); 348 $this->base_url = $url[1]; 349 continue; 350 } 351 if ( false !== strpos( $importline, '<wp:category>' ) ) { 352 preg_match( '|<wp:category>(.*?)</wp:category>|is', $importline, $category ); 353 $this->categories[] = $this->process_category( $category[1] ); 354 continue; 355 } 356 if ( false !== strpos( $importline, '<wp:tag>' ) ) { 357 preg_match( '|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag ); 358 $this->tags[] = $this->process_tag( $tag[1] ); 359 continue; 360 } 361 if ( false !== strpos( $importline, '<wp:term>' ) ) { 362 preg_match( '|<wp:term>(.*?)</wp:term>|is', $importline, $term ); 363 $this->terms[] = $this->process_term( $term[1] ); 364 continue; 365 } 366 if ( false !== strpos( $importline, '<wp:author>' ) ) { 367 preg_match( '|<wp:author>(.*?)</wp:author>|is', $importline, $author ); 368 $a = $this->process_author( $author[1] ); 369 $this->authors[$a['author_login']] = $a; 370 continue; 371 } 372 if ( false !== strpos( $importline, '<item>' ) ) { 373 $post = ''; 374 $in_post = true; 375 continue; 376 } 377 if ( false !== strpos( $importline, '</item>' ) ) { 378 $in_post = false; 379 $this->posts[] = $this->process_post( $post ); 380 continue; 381 } 382 if ( $in_post ) { 383 $post .= $importline . "\n"; 384 } 385 } 386 387 $this->fclose($fp); 388 } 389 390 if ( ! $is_wxr ) 391 return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); 392 393 return array( 394 'authors' => $this->authors, 395 'posts' => $this->posts, 396 'categories' => $this->categories, 397 'tags' => $this->tags, 398 'terms' => $this->terms, 399 'base_url' => $this->base_url 400 ); 401 } 402 403 function get_tag( $string, $tag ) { 404 global $wpdb; 405 preg_match( "|<$tag.*?>(.*?)</$tag>|is", $string, $return ); 406 if ( isset( $return[1] ) ) { 407 $return = preg_replace( '|^<!\[CDATA\[(.*)\]\]>$|s', '$1', $return[1] ); 408 $return = $wpdb->escape( trim( $return ) ); 409 } else { 410 $return = ''; 411 } 412 return $return; 413 } 414 415 function process_category( $c ) { 416 return array( 417 'term_id' => $this->get_tag( $c, 'wp:term_id' ), 418 'cat_name' => $this->get_tag( $c, 'wp:cat_name' ), 419 'category_nicename' => $this->get_tag( $c, 'wp:category_nicename' ), 420 'category_parent' => $this->get_tag( $c, 'wp:category_parent' ), 421 'category_description' => $this->get_tag( $c, 'wp:category_description' ), 422 ); 423 } 424 425 function process_tag( $t ) { 426 return array( 427 'term_id' => $this->get_tag( $t, 'wp:term_id' ), 428 'tag_name' => $this->get_tag( $t, 'wp:tag_name' ), 429 'tag_slug' => $this->get_tag( $t, 'wp:tag_slug' ), 430 'tag_description' => $this->get_tag( $t, 'wp:tag_description' ), 431 ); 432 } 433 434 function process_term( $t ) { 435 return array( 436 'term_id' => $this->get_tag( $t, 'wp:term_id' ), 437 'term_taxonomy' => $this->get_tag( $t, 'wp:term_taxonomy' ), 438 'slug' => $this->get_tag( $t, 'wp:term_slug' ), 439 'term_parent' => $this->get_tag( $t, 'wp:term_parent' ), 440 'term_name' => $this->get_tag( $t, 'wp:term_name' ), 441 'term_description' => $this->get_tag( $t, 'wp:term_description' ), 442 ); 443 } 444 445 function process_author( $a ) { 446 return array( 447 'author_login' => $this->get_tag( $a, 'wp:author_login' ), 448 'author_email' => $this->get_tag( $a, 'wp:author_email' ), 449 'author_display_name' => $this->get_tag( $a, 'wp:author_display_name' ), 450 'author_first_name' => $this->get_tag( $a, 'wp:author_first_name' ), 451 'author_last_name' => $this->get_tag( $a, 'wp:author_last_name' ), 452 ); 453 } 454 455 function process_post( $post ) { 456 $post_id = $this->get_tag( $post, 'wp:post_id' ); 457 $post_title = $this->get_tag( $post, 'title' ); 458 $post_date = $this->get_tag( $post, 'wp:post_date' ); 459 $post_date_gmt = $this->get_tag( $post, 'wp:post_date_gmt' ); 460 $comment_status = $this->get_tag( $post, 'wp:comment_status' ); 461 $ping_status = $this->get_tag( $post, 'wp:ping_status' ); 462 $status = $this->get_tag( $post, 'wp:status' ); 463 $post_name = $this->get_tag( $post, 'wp:post_name' ); 464 $post_parent = $this->get_tag( $post, 'wp:post_parent' ); 465 $menu_order = $this->get_tag( $post, 'wp:menu_order' ); 466 $post_type = $this->get_tag( $post, 'wp:post_type' ); 467 $post_password = $this->get_tag( $post, 'wp:post_password' ); 468 $is_sticky = $this->get_tag( $post, 'wp:is_sticky' ); 469 $guid = $this->get_tag( $post, 'guid' ); 470 $post_author = $this->get_tag( $post, 'dc:creator' ); 471 472 $post_excerpt = $this->get_tag( $post, 'excerpt:encoded' ); 473 $post_excerpt = preg_replace_callback( '|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_excerpt ); 474 $post_excerpt = str_replace( '<br>', '<br />', $post_excerpt ); 475 $post_excerpt = str_replace( '<hr>', '<hr />', $post_excerpt ); 476 477 $post_content = $this->get_tag( $post, 'content:encoded' ); 478 $post_content = preg_replace_callback( '|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_content ); 479 $post_content = str_replace( '<br>', '<br />', $post_content ); 480 $post_content = str_replace( '<hr>', '<hr />', $post_content ); 481 482 $postdata = compact( 'post_id', 'post_author', 'post_date', 'post_date_gmt', 'post_content', 'post_excerpt', 483 'post_title', 'status', 'post_name', 'comment_status', 'ping_status', 'guid', 'post_parent', 484 'menu_order', 'post_type', 'post_password', 'is_sticky' 485 ); 486 487 preg_match_all( '|<category domain="([^"]+?)" nicename="([^"]+?)">(.+?)</category>|is', $post, $terms, PREG_SET_ORDER ); 488 foreach ( $terms as $t ) { 489 $post_terms[] = array( 490 'slug' => $t[2], 491 'domain' => $t[1], 492 'name' => str_replace( array( '<![CDATA[', ']]>' ), '', $t[3] ), 493 ); 494 } 495 if ( ! empty( $post_terms ) ) $postdata['terms'] = $post_terms; 496 497 preg_match_all( '|<wp:comment>(.+?)</wp:comment>|is', $post, $comments ); 498 $comments = $comments[1]; 499 if ( $comments ) { 500 foreach ( $comments as $comment ) { 501 $post_comments[] = array( 502 'comment_id' => $this->get_tag( $comment, 'wp:comment_id' ), 503 'comment_author' => $this->get_tag( $comment, 'wp:comment_author' ), 504 'comment_author_email' => $this->get_tag( $comment, 'wp:comment_author_email' ), 505 'comment_author_IP' => $this->get_tag( $comment, 'wp:comment_author_IP' ), 506 'comment_author_url' => $this->get_tag( $comment, 'wp:comment_author_url' ), 507 'comment_date' => $this->get_tag( $comment, 'wp:comment_date' ), 508 'comment_date_gmt' => $this->get_tag( $comment, 'wp:comment_date_gmt' ), 509 'comment_content' => $this->get_tag( $comment, 'wp:comment_content' ), 510 'comment_approved' => $this->get_tag( $comment, 'wp:comment_approved' ), 511 'comment_type' => $this->get_tag( $comment, 'wp:comment_type' ), 512 'comment_parent' => $this->get_tag( $comment, 'wp:comment_parent' ), 513 ); 514 } 515 } 516 if ( ! empty( $post_comments ) ) $postdata['comments'] = $post_comments; 517 518 preg_match_all( '|<wp:postmeta>(.+?)</wp:postmeta>|is', $post, $postmeta ); 519 $postmeta = $postmeta[1]; 520 if ( $postmeta ) { 521 foreach ( $postmeta as $p ) { 522 $post_postmeta[] = array( 523 'key' => $this->get_tag( $p, 'wp:meta_key' ), 524 'value' => $this->get_tag( $p, 'wp:meta_value' ), 525 ); 526 } 527 } 528 if ( ! empty( $post_postmeta ) ) $postdata['postmeta'] = $post_postmeta; 529 530 return $postdata; 531 } 532 533 function _normalize_tag( $matches ) { 534 return '<' . strtolower( $matches[1] ); 535 } 536 537 function fopen( $filename, $mode = 'r' ) { 538 if ( $this->has_gzip ) 539 return gzopen( $filename, $mode ); 540 return fopen( $filename, $mode ); 541 } 542 543 function feof( $fp ) { 544 if ( $this->has_gzip ) 545 return gzeof( $fp ); 546 return feof( $fp ); 547 } 548 549 function fgets( $fp, $len = 8192 ) { 550 if ( $this->has_gzip ) 551 return gzgets( $fp, $len ); 552 return fgets( $fp, $len ); 553 } 554 555 function fclose( $fp ) { 556 if ( $this->has_gzip ) 557 return gzclose( $fp ); 558 return fclose( $fp ); 559 } 560 } -
trunk/readme.txt
3 3 Donate link: 4 4 Tags: importer, wordpress 5 5 Requires at least: 3.0 6 Tested up to: 3.0 6 Tested up to: 3.0.1 7 7 Stable tag: 0.2 8 8 9 Import posts, pages, comments, custom fields, categories, and tagsfrom a WordPress export file.9 Import posts, pages, comments, custom fields, categories, tags and more from a WordPress export file. 10 10 11 11 == Description == 12 12 13 Import posts, pages, comments, custom fields, categories, and tagsfrom a WordPress export file.13 Import posts, pages, comments, custom fields, categories, tags and more from a WordPress export file. 14 14 15 15 == Installation == 16 16 17 17 1. Upload the `wordpress-importer` folder to the `/wp-content/plugins/` directory 18 18 1. Activate the plugin through the 'Plugins' menu in WordPress 19 1. Go to the Tools -> Import screen, Click on WordPress19 1. Go to the Tools -> Import screen, click on WordPress 20 20 21 == Frequently Asked Questions==21 == Changelog == 22 22 23 == Screenshots == 23 = 0.3 = 24 * Use an XML Parser if possible 25 * Proper import support for nav menus 26 * ... and more 24 27 25 == Changelog ==26 27 28 = 0.1 = 28 29 * Initial release 30 31 == Upgrade Notice == 32 33 = 0.3 = 34 Upgrade for a more robust and reliable experience when importing WordPress export file. 35 36 == Filters == 37 38 The importer has a couple of filters to allow you to completely enable/block certain features: 39 * `import_allow_create_users`: return false if you only want to allow mapping to existing users 40 * `import_allow_fetch_attachments`: return false if you do not wish to allow importing and downloading of attachments 41 * `import_attachment_size_limit`: return an integer value for the maximum file size in bytes to save (default is 0, which is unlimited) -
trunk/wordpress-importer.php
2 2 /* 3 3 Plugin Name: WordPress Importer 4 4 Plugin URI: http://wordpress.org/extend/plugins/wordpress-importer/ 5 Description: Import posts, pages, comments, custom fields, categories, and tagsfrom a WordPress export file.5 Description: Import posts, pages, comments, custom fields, categories, tags and more from a WordPress export file. 6 6 Author: wordpressdotorg 7 7 Author URI: http://wordpress.org/ 8 Version: 0.2 9 Stable tag: 0.2 8 Version: 0.3 10 9 License: GPL v2 - http://www.gnu.org/licenses/old-licenses/gpl-2.0.html 11 10 */ 12 11 13 if ( ! defined('WP_LOAD_IMPORTERS') )12 if ( ! defined( 'WP_LOAD_IMPORTERS' ) ) 14 13 return; 15 14 16 15 // Load Importer API 17 16 require_once ABSPATH . 'wp-admin/includes/import.php'; 18 17 19 if ( ! class_exists( 'WP_Importer' ) ) {18 if ( ! class_exists( 'WP_Importer' ) ) { 20 19 $class_wp_importer = ABSPATH . 'wp-admin/includes/class-wp-importer.php'; 21 20 if ( file_exists( $class_wp_importer ) ) 22 require _once$class_wp_importer;21 require $class_wp_importer; 23 22 } 24 23 24 // include WXR file parsers 25 require dirname( __FILE__ ) . '/parsers.php'; 26 25 27 /** 26 * WordPress Importer 28 * WordPress Importer class for managing the import process of a WXR file 27 29 * 28 30 * @package WordPress 29 31 * @subpackage Importer 30 32 */ 31 33 if ( class_exists( 'WP_Importer' ) ) { 32 34 class WP_Import extends WP_Importer { 33 34 var $post_ids_processed = array ();35 var $orphans = array ();36 var $file;37 35 var $id; 38 var $mtnames = array ();39 var $newauthornames = array ();40 var $allauthornames = array ();41 36 42 var $author_ids = array (); 43 var $tags = array (); 44 var $categories = array (); 45 var $terms = array (); 46 var $authors = array (); 37 var $authors = array(); 38 var $posts = array(); 39 var $terms = array(); 40 var $categories = array(); 41 var $tags = array(); 42 var $base_url = ''; 47 43 48 var $j = -1; 44 var $processed_authors = array(); 45 var $processed_terms = array(); 46 var $processed_posts = array(); 47 var $post_orphans = array(); 48 var $processed_menu_items = array(); 49 var $menu_item_orphans = array(); 50 var $missing_menu_items = array(); 51 52 var $authors_from_posts = false; 49 53 var $fetch_attachments = false; 50 var $url_remap = array 54 var $url_remap = array(); 51 55 52 function header() { 53 echo '<div class="wrap">'; 54 screen_icon(); 55 echo '<h2>'.__('Import WordPress', 'wordpress-importer').'</h2>'; 56 } 56 function WP_Import() { /* nothing */ } 57 57 58 function footer() { 59 echo '</div>'; 60 } 58 function dispatch() { 59 $this->header(); 61 60 62 function greet() { 63 echo '<div class="narrow">'; 64 echo '<p>'.__('Howdy! Upload your WordPress eXtended RSS (WXR) file and we’ll import the posts, pages, comments, custom fields, categories, and tags into this site.', 'wordpress-importer').'</p>'; 65 echo '<p>'.__('Choose a WordPress WXR file to upload, then click Upload file and import.', 'wordpress-importer').'</p>'; 66 wp_import_upload_form("admin.php?import=wordpress&step=1"); 67 echo '</div>'; 68 } 69 70 function get_tag( $string, $tag ) { 71 global $wpdb; 72 preg_match("|<$tag.*?>(.*?)</$tag>|is", $string, $return); 73 if ( isset($return[1]) ) { 74 $return = preg_replace('|^<!\[CDATA\[(.*)\]\]>$|s', '$1', $return[1]); 75 $return = $wpdb->escape( trim( $return ) ); 76 } else { 77 $return = ''; 61 $step = empty( $_GET['step'] ) ? 0 : (int) $_GET['step']; 62 switch ( $step ) { 63 case 0: 64 $this->greet(); 65 break; 66 case 1: 67 check_admin_referer( 'import-upload' ); 68 if ( $this->handle_upload() ) 69 $this->import_options(); 70 break; 71 case 2: 72 check_admin_referer( 'import-wordpress' ); 73 $this->fetch_attachments = ( ! empty( $_POST['fetch_attachments'] ) && $this->allow_fetch_attachments() ); 74 $this->id = (int) $_POST['import_id']; 75 $file = get_attached_file( $this->id ); 76 $this->import( $file ); 77 break; 78 78 } 79 return $return;80 }81 79 82 function has_gzip() { 83 return is_callable('gzopen'); 80 $this->footer(); 84 81 } 85 82 86 function fopen($filename, $mode='r') { 87 if ( $this->has_gzip() ) 88 return gzopen($filename, $mode); 89 return fopen($filename, $mode); 90 } 83 function import( $file ) { 84 add_filter( 'import_post_meta_key', array( $this, 'is_valid_meta_key' ) ); 91 85 92 function feof($fp) { 93 if ( $this->has_gzip() ) 94 return gzeof($fp); 95 return feof($fp); 96 } 86 $this->import_start( $file ); 97 87 98 function fgets($fp, $len=8192) { 99 if ( $this->has_gzip() ) 100 return gzgets($fp, $len); 101 return fgets($fp, $len); 102 } 88 $this->get_author_mapping(); 103 89 104 function fclose($fp) { 105 if ( $this->has_gzip() ) 106 return gzclose($fp); 107 return fclose($fp); 108 } 90 wp_suspend_cache_invalidation( true ); 91 $this->process_categories(); 92 $this->process_tags(); 93 $this->process_terms(); 94 $this->process_posts(); 95 wp_suspend_cache_invalidation( false ); 109 96 110 function get_entries($process_post_func=NULL) { 111 set_magic_quotes_runtime(0); 97 // update items with missing/incorrect parent IDs 98 $this->backfill_parents(); 99 // update attachment references within posts and postmeta 100 $this->backfill_attachment_urls(); 112 101 113 $ doing_entry = false;114 $is_wxr_file = false;102 $this->import_end(); 103 } 115 104 116 $fp = $this->fopen($this->file, 'r'); 117 if ($fp) { 118 while ( !$this->feof($fp) ) { 119 $importline = rtrim($this->fgets($fp)); 105 function import_start( $file ) { 106 $import_arr = $this->parse( $file ); 120 107 121 // this doesn't check that the file is perfectly valid but will at least confirm that it's not the wrong format altogether 122 if ( !$is_wxr_file && preg_match('|xmlns:wp="http://wordpress[.]org/export/\d+[.]\d+/"|', $importline) ) 123 $is_wxr_file = true; 124 125 if ( false !== strpos($importline, '<wp:base_site_url>') ) { 126 preg_match('|<wp:base_site_url>(.*?)</wp:base_site_url>|is', $importline, $url); 127 $this->base_url = $url[1]; 128 continue; 129 } 130 if ( false !== strpos($importline, '<wp:category>') ) { 131 preg_match('|<wp:category>(.*?)</wp:category>|is', $importline, $category); 132 $this->categories[] = $category[1]; 133 continue; 134 } 135 if ( false !== strpos($importline, '<wp:tag>') ) { 136 preg_match('|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag); 137 $this->tags[] = $tag[1]; 138 continue; 139 } 140 if ( false !== strpos($importline, '<wp:term>') ) { 141 preg_match('|<wp:term>(.*?)</wp:term>|is', $importline, $term); 142 $this->terms[] = $term[1]; 143 continue; 144 } 145 if ( false !== strpos($importline, '<wp:author>') ) { 146 preg_match('|<wp:author>(.*?)</wp:author>|is', $importline, $author); 147 $this->authors[] = $author[1]; 148 continue; 149 } 150 if ( false !== strpos($importline, '<item>') ) { 151 $this->post = ''; 152 $doing_entry = true; 153 continue; 154 } 155 if ( false !== strpos($importline, '</item>') ) { 156 $doing_entry = false; 157 if ($process_post_func) 158 call_user_func($process_post_func, $this->post); 159 continue; 160 } 161 if ( $doing_entry ) { 162 $this->post .= $importline . "\n"; 163 } 164 } 165 166 $this->fclose($fp); 108 if ( is_wp_error( $import_arr ) ) { 109 echo '<p><strong>' . __( 'Sorry, there has been an error.', 'wordpress-importer' ) . '</strong></p>'; 110 echo '<p>' . esc_html( $import_arr->get_error_message() ) . '</p>'; 111 $this->footer(); 112 die(); 167 113 } 168 114 169 return $is_wxr_file; 115 $this->get_authors_from_import( $import_arr ); 116 $this->posts = $import_arr['posts']; 117 $this->terms = $import_arr['terms']; 118 $this->categories = $import_arr['categories']; 119 $this->tags = $import_arr['tags']; 120 $this->base_url = esc_url( $import_arr['base_url'] ); 170 121 122 wp_defer_term_counting( true ); 123 wp_defer_comment_counting( true ); 124 125 do_action( 'import_start' ); 171 126 } 172 127 173 function get_wp_authors() { 174 // We need to find unique values of author names, while preserving the order, so this function emulates the unique_value(); php function, without the sorting. 175 $temp = $this->allauthornames; 176 $authors[0] = array_shift($temp); 177 $y = count($temp) + 1; 178 for ($x = 1; $x < $y; $x ++) { 179 $next = array_shift($temp); 180 if (!(in_array($next, $authors))) 181 array_push($authors, $next); 128 function import_end() { 129 wp_import_cleanup( $this->id ); 130 131 wp_cache_flush(); 132 foreach ( get_taxonomies() as $tax ) { 133 delete_option( "{$tax}_children" ); 134 _get_term_hierarchy( $tax ); 182 135 } 183 136 184 return $authors; 137 wp_defer_term_counting( false ); 138 wp_defer_comment_counting( false ); 139 140 echo '<p>' . __( 'All done.' ) . ' <a href="' . admin_url() . '">' . __( 'Have fun!' ) . '</a>' . '</p>'; 141 142 do_action( 'import_end' ); 185 143 } 186 144 187 function get_authors_from_post() {188 global $current_user;145 function handle_upload() { 146 $file = wp_import_handle_upload(); 189 147 190 // this will populate $this->author_ids with a list of author_names => user_ids 148 if ( isset( $file['error'] ) ) { 149 echo '<p><strong>' . __( 'Sorry, there has been an error.', 'wordpress-importer' ) . '</strong></p>'; 150 echo '<p>' . esc_html( $file['error'] ) . '</p>'; 151 return false; 152 } 191 153 192 foreach ( (array) $_POST['author_in'] as $i => $in_author_name ) { 154 $this->id = (int) $file['id']; 155 $import_data = $this->parse( $file['file'] ); 156 if ( is_wp_error( $import_data ) ) { 157 echo '<p><strong>' . __( 'Sorry, there has been an error.', 'wordpress-importer' ) . '</strong></p>'; 158 echo '<p>' . esc_html( $import_data->get_error_message() ) . '</p>'; 159 return false; 160 } 193 161 194 if ( !empty($_POST['user_select'][$i]) ) { 195 // an existing user was selected in the dropdown list 196 $user = get_userdata( intval($_POST['user_select'][$i]) ); 197 if ( isset($user->ID) ) 198 $this->author_ids[$in_author_name] = $user->ID; 199 } 200 elseif ( $this->allow_create_users() ) { 201 // nothing was selected in the dropdown list, so we'll use the name in the text field 162 $this->get_authors_from_import( $import_data ); 202 163 203 $new_author_name = trim($_POST['user_create'][$i]); 204 // if the user didn't enter a name, assume they want to use the same name as in the import file 205 if ( empty($new_author_name) ) 206 $new_author_name = $in_author_name; 164 return true; 165 } 207 166 208 $user_id = username_exists($new_author_name); 209 if ( !$user_id ) { 210 $user_id = wp_create_user($new_author_name, wp_generate_password()); 167 function get_authors_from_import( $import_data ) { 168 if ( ! empty( $import_data['authors'] ) ) { 169 $this->authors = $import_data['authors']; 170 // no author information, grab it from the posts 171 } else { 172 foreach ( $import_data['posts'] as $post ) { 173 $login = sanitize_user( $post['post_author'], true ); 174 if ( empty( $login ) ) { 175 _e( sprintf( 'Error importing author %s their posts will be attributed to the current user', esc_html( $post['post_author'] ) ) ); 176 echo '<br />'; 177 continue; 211 178 } 212 179 213 if ( !is_wp_error( $user_id ) ) { 214 $this->author_ids[$in_author_name] = $user_id; 215 } 180 if ( ! isset($this->authors[$login]) ) 181 $this->authors[$login] = array( 182 'author_login' => $login, 183 'author_display_name' => $post['post_author'] 184 ); 216 185 } 217 218 // failsafe: if the user_id was invalid, default to the current user 219 if ( empty($this->author_ids[$in_author_name]) ) { 220 $this->author_ids[$in_author_name] = intval($current_user->ID); 221 } 186 $this->authors_from_posts = true; 222 187 } 223 224 188 } 225 189 226 function wp_authors_form() { 190 function import_options() { 191 $j = 0; 227 192 ?> 228 <h2><?php _e('Assign Authors', 'wordpress-importer'); ?></h2> 229 <p><?php _e('To make it easier for you to edit and save the imported posts and drafts, you may want to change the name of the author of the posts. For example, you may want to import all the entries as <code>admin</code>s entries.', 'wordpress-importer'); ?></p> 230 <?php 231 if ( $this->allow_create_users() ) { 232 echo '<p>'.__('If a new user is created by WordPress, a password will be randomly generated. Manually change the user’s details if necessary.', 'wordpress-importer')."</p>\n"; 233 } 193 <form action="<?php echo admin_url( 'admin.php?import=wordpress&step=2' ); ?>" method="post"> 194 <?php wp_nonce_field( 'import-wordpress' ); ?> 195 <input type="hidden" name="import_id" value="<?php echo $this->id; ?>" /> 234 196 197 <?php if ( ! empty( $this->authors ) ) : ?> 198 <h3><?php _e('Assign Authors', 'wordpress-importer'); ?></h3> 199 <p><?php _e( 'To make it easier for you to edit and save the imported content, you may want to reassign the author of the imported item to an existing user of this site. For example, you may want to import all the entries as <code>admin</code>s entries.', 'wordpress-importer' ); ?></p> 200 <?php if ( ! $this->authors_from_posts && $this->allow_create_users() ) : ?> 201 <p><?php printf( __( 'If a new user is created by WordPress, a new password will be randomly generated and the new user’s role will be set as %s. Manually changing the new user’s details will be necessary.', 'wordpress-importer' ), esc_html( get_option('default_role') ) ); ?></p> 202 <?php endif; ?> 203 <ol id="authors"> 204 <?php foreach ( $this->authors as $author ) : ?> 205 <li><?php $this->author_select( $j++, $author ); ?></li> 206 <?php endforeach; ?> 207 </ol> 208 <?php endif; ?> 235 209 236 $authors = $this->get_wp_authors(); 237 echo '<form action="?import=wordpress&step=2&id=' . $this->id . '" method="post">'; 238 wp_nonce_field('import-wordpress'); 239 ?> 240 <ol id="authors"> 210 <?php if ( $this->allow_fetch_attachments() ) : ?> 211 <h3><?php _e('Import Attachments', 'wordpress-importer'); ?></h3> 212 <p> 213 <input type="checkbox" value="1" name="fetch_attachments" id="import-attachments" /> 214 <label for="import-attachments"><?php _e( 'Download and import file attachments', 'wordpress-importer' ); ?></label> 215 </p> 216 <?php endif; ?> 217 218 <p class="submit"><input type="submit" class="button" value="<?php esc_attr_e( 'Submit', 'wordpress-importer' ); ?>" /></p> 219 </form> 241 220 <?php 242 $j = -1; 243 foreach ($authors as $author) { 244 ++ $j; 245 echo '<li>'.__('Import author:', 'wordpress-importer').' <strong>'.$author.'</strong><br />'; 246 $this->users_form($j, $author); 247 echo '</li>'; 248 } 221 } 249 222 250 if ( $this->allow_fetch_attachments() ) { 223 function author_select( $n, $author ) { 224 if ( ! $this->authors_from_posts && $this->allow_create_users() ) 225 printf( __( 'Import author %1$s or map to existing user', 'wordpress-importer' ), '<strong>' . esc_html( $author['author_display_name'] ) . '</strong>' ); 226 else 227 printf( __( 'Map author %1$s to existing user', 'wordpress-importer' ), '<strong>' . esc_html( $author['author_display_name'] ) . '</strong>' ); 251 228 ?> 252 </ol> 253 <h2><?php _e('Import Attachments', 'wordpress-importer'); ?></h2> 254 <p> 255 <input type="checkbox" value="1" name="attachments" id="import-attachments" /> 256 <label for="import-attachments"><?php _e('Download and import file attachments', 'wordpress-importer') ?></label> 257 </p> 258 229 <input type="hidden" name="imported_authors[<?php echo $n; ?>]" value="<?php esc_attr_e( $author['author_login'] ); ?>" /> 230 <?php wp_dropdown_users( array( 'name' => "user_map[$n]", 'multi' => true, 'show_option_all' => __( '- Select -', 'wordpress-importer' ) ) ); ?> 259 231 <?php 260 }261 262 echo '<p class="submit">';263 echo '<input type="submit" class="button" value="'. esc_attr__('Submit', 'wordpress-importer') .'" />'.'<br />';264 echo '</p>';265 echo '</form>';266 267 232 } 268 233 269 function users_form($n, $author) { 234 function get_author_mapping() { 235 if ( ! isset( $_POST['imported_authors'] ) ) 236 return; 270 237 271 if ( $this->allow_create_users() ) { 272 printf('<label>'.__('Create user %1$s or map to existing', 'wordpress-importer'), ' <input type="text" value="'. esc_attr($author) .'" name="'.'user_create['.intval($n).']'.'" maxlength="30" /></label> <br />'); 273 } 274 else { 275 echo __('Map to existing', 'wordpress-importer').'<br />'; 276 } 238 foreach ( (array) $_POST['imported_authors'] as $i => $login ) { 239 $bad_login = $login; 240 $login = sanitize_user( $login, true ); 277 241 278 // keep track of $n => $author name 279 echo '<input type="hidden" name="author_in['.intval($n).']" value="' . esc_attr($author).'" />'; 242 if ( ! empty( $_POST['user_map'][$i] ) ) { 243 $user = get_userdata( intval($_POST['user_map'][$i]) ); 244 if ( isset( $user->ID ) ) 245 $this->processed_authors[$login] = $user->ID; 246 } else if ( ! $this->authors_from_posts && $this->allow_create_users() ) { 247 $user_id = username_exists( $login ); 248 if ( ! $user_id ) { 249 $user_data = array( 250 'user_login' => $login, 251 'user_pass' => wp_generate_password(), 252 'user_email' => $this->authors[$login]['author_email'], 253 'display_name' => $this->authors[$login]['author_display_name'], 254 'first_name' => $this->authors[$login]['author_first_name'], 255 'last_name' => $this->authors[$login]['author_last_name'], 256 ); 257 $user_id = wp_insert_user( $user_data ); 258 } 280 259 281 $users = get_users_of_blog(); 282 ?><select name="user_select[<?php echo $n; ?>]"> 283 <option value="0"><?php _e('- Select -', 'wordpress-importer'); ?></option> 284 <?php 285 foreach ($users as $user) { 286 echo '<option value="'.$user->user_id.'">'.$user->user_login.'</option>'; 287 } 288 ?> 289 </select> 290 <?php 291 } 260 if ( ! is_wp_error( $user_id ) ) 261 $this->processed_authors[$login] = $user_id; 262 else 263 _e( sprintf( 'Error importing author %s their posts will be attributed to the current user', esc_html( $post['post_author'] ) ) ); 264 } 292 265 293 function select_authors() { 294 $is_wxr_file = $this->get_entries(array(&$this, 'process_author')); 295 if ( $is_wxr_file ) { 296 $this->wp_authors_form(); 266 // failsafe: if the user_id was invalid, default to the current user 267 if ( empty( $this->processed_authors[$login] ) ) 268 $this->processed_authors[$login] = (int) get_current_user_id(); 297 269 } 298 else {299 echo '<h2>'.__('Invalid file', 'wordpress-importer').'</h2>';300 echo '<p>'.__('Please upload a valid WXR (WordPress eXtended RSS) export file.', 'wordpress-importer').'</p>';301 }302 270 } 303 271 304 // fetch the user ID for a given author name, respecting the mapping preferences305 function checkauthor($author) {306 global $current_user;307 308 if ( !empty($this->author_ids[$author]) )309 return $this->author_ids[$author];310 311 // failsafe: map to the current user312 return $current_user->ID;313 }314 315 316 317 272 function process_categories() { 318 global $wpdb; 273 if ( empty( $this->categories ) ) 274 return; 319 275 320 $cat_names = (array) get_terms('category', array('fields' => 'names')); 321 322 while ( $c = array_shift($this->categories) ) { 323 $cat_name = trim($this->get_tag( $c, 'wp:cat_name' )); 324 325 // If the category exists we leave it alone 326 if ( in_array($cat_name, $cat_names) ) 276 foreach ( $this->categories as $cat ) { 277 // if the category already exists leave it alone 278 $term_id = term_exists( $cat['category_nicename'], 'category' ); 279 if ( $term_id ) { 280 if ( is_array($term_id) ) $term_id = $term_id['term_id']; 281 $this->processed_terms[intval($cat['term_id'])] = (int) $term_id; 327 282 continue; 283 } 328 284 329 $category_nicename = $this->get_tag( $c, 'wp:category_nicename' ); 330 $category_description = $this->get_tag( $c, 'wp:category_description' ); 331 $posts_private = (int) $this->get_tag( $c, 'wp:posts_private' ); 332 $links_private = (int) $this->get_tag( $c, 'wp:links_private' ); 285 $category_parent = empty( $cat['category_parent'] ) ? 0 : category_exists( $cat['category_parent'] ); 286 $category_description = isset( $cat['category_description'] ) ? $cat['category_description'] : ''; 287 $catarr = array( 288 'category_nicename' => $cat['category_nicename'], 289 'category_parent' => $category_parent, 290 'cat_name' => $cat['cat_name'], 291 'category_description' => $category_description 292 ); 333 293 334 $parent = $this->get_tag( $c, 'wp:category_parent' ); 335 336 if ( empty($parent) ) 337 $category_parent = '0'; 338 else 339 $category_parent = category_exists($parent); 340 341 $catarr = compact('category_nicename', 'category_parent', 'posts_private', 'links_private', 'posts_private', 'cat_name', 'category_description'); 342 343 print '<em>' . sprintf( __( 'Importing category <em>%s</em>…' , 'wordpress-importer'), esc_html($cat_name) ) . '</em><br />' . "\n"; 344 $cat_ID = wp_insert_category($catarr); 294 $id = wp_insert_category( $catarr ); 295 if ( ! is_wp_error( $id ) ) { 296 $this->processed_terms[intval($cat['term_id'])] = $id; 297 } else { 298 echo __( 'Error importing category:', 'wordpress-importer' ) . ' ' . esc_html( $id->get_error_message() ) . '<br />'; 299 continue; 300 } 345 301 } 346 302 } 347 303 348 304 function process_tags() { 349 global $wpdb; 305 if ( empty( $this->tags ) ) 306 return; 350 307 351 $tag_names = (array) get_terms('post_tag', array('fields' => 'names')); 352 353 while ( $c = array_shift($this->tags) ) { 354 $tag_name = trim($this->get_tag( $c, 'wp:tag_name' )); 355 356 // If the category exists we leave it alone 357 if ( in_array($tag_name, $tag_names) ) 308 foreach ( $this->tags as $tag ) { 309 // if the tag already exists leave it alone 310 $term_id = term_exists( $tag['tag_slug'], 'post_tag' ); 311 if ( $term_id ) { 312 if ( is_array($term_id) ) $term_id = $term_id['term_id']; 313 $this->processed_terms[intval($tag['term_id'])] = (int) $term_id; 358 314 continue; 315 } 359 316 360 $ slug = $this->get_tag( $c, 'wp:tag_slug' );361 $ description = $this->get_tag( $c, 'wp:tag_description');317 $tag_desc = isset( $tag['tag_description'] ) ? $tag['tag_description'] : ''; 318 $tagarr = array( 'slug' => $tag['tag_slug'], 'description' => $tag_desc ); 362 319 363 $tagarr = compact('slug', 'description'); 364 365 print '<em>' . sprintf( __( 'Importing tag <em>%s</em>…' , 'wordpress-importer'), esc_html($tag_name) ) . '</em><br />' . "\n"; 366 $tag_ID = wp_insert_term($tag_name, 'post_tag', $tagarr); 320 $id = wp_insert_term( $tag['tag_name'], 'post_tag', $tagarr ); 321 if ( ! is_wp_error( $id ) ) { 322 $this->processed_terms[intval($tag['term_id'])] = $id['term_id']; 323 } else { 324 echo __( 'Error importing post tag:', 'wordpress-importer' ) . ' ' . esc_html( $id->get_error_message() ) . '<br />'; 325 continue; 326 } 367 327 } 368 328 } 369 329 370 330 function process_terms() { 371 global $wpdb, $wp_taxonomies; 331 if ( empty( $this->terms ) ) 332 return; 372 333 373 $custom_taxonomies = $wp_taxonomies; 374 // get rid of the standard taxonomies 375 unset( $custom_taxonomies['category'] ); 376 unset( $custom_taxonomies['post_tag'] ); 377 unset( $custom_taxonomies['link_category'] ); 334 foreach ( $this->terms as $term ) { 335 // if the term already exists in the correct taxonomy leave it alone 336 $term_id = term_exists( $term['slug'], $term['term_taxonomy'] ); 337 if ( $term_id ) { 338 if ( is_array($term_id) ) $term_id = $term_id['term_id']; 339 $this->processed_terms[intval($term['term_id'])] = (int) $term_id; 340 continue; 341 } 378 342 379 $custom_taxonomies = array_keys( $custom_taxonomies ); 380 $current_terms = (array) get_terms( $custom_taxonomies, array('get' => 'all') ); 381 $taxonomies = array(); 382 foreach ( $current_terms as $term ) { 383 if ( isset( $_terms[$term->taxonomy] ) ) { 384 $taxonomies[$term->taxonomy] = array_merge( $taxonomies[$term->taxonomy], array($term->name) ); 343 if ( empty( $term['term_parent'] ) ) { 344 $parent = 0; 385 345 } else { 386 $taxonomies[$term->taxonomy] = array($term->name); 346 $parent = term_exists( $term['term_parent'], $term['term_taxonomy'] ); 347 if ( is_array( $parent ) ) $parent = $parent['term_id']; 387 348 } 388 } 349 $description = isset( $term['term_description'] ) ? $term['term_description'] : ''; 350 $termarr = array( 'slug' => $term['slug'], 'description' => $description, 'parent' => intval($parent) ); 389 351 390 while ( $c = array_shift($this->terms) ) { 391 $term_name = trim($this->get_tag( $c, 'wp:term_name' )); 392 $term_taxonomy = trim($this->get_tag( $c, 'wp:term_taxonomy' )); 393 394 // If the term exists in the taxonomy we leave it alone 395 if ( isset($taxonomies[$term_taxonomy] ) && in_array( $term_name, $taxonomies[$term_taxonomy] ) ) 352 $id = wp_insert_term( $term['term_name'], $term['term_taxonomy'], $termarr ); 353 if ( ! is_wp_error( $id ) ) { 354 $this->processed_terms[intval($term['term_id'])] = $id['term_id']; 355 } else { 356 echo __( 'Error importing term:', 'wordpress-importer' ) . ' ' . esc_html( $id->get_error_message() ) . '<br />'; 396 357 continue; 397 398 $slug = $this->get_tag( $c, 'wp:term_slug' ); 399 $description = $this->get_tag( $c, 'wp:term_description' ); 400 401 $termarr = compact('slug', 'description'); 402 403 print '<em>' . sprintf( __( 'Importing <em>%s</em>…' , 'wordpress-importer'), esc_html($term_name) ) . '</em><br />' . "\n"; 404 $term_ID = wp_insert_term($term_name, $this->get_tag( $c, 'wp:term_taxonomy' ), $termarr); 358 } 405 359 } 406 360 } 407 361 408 function process_author($post) {409 $author = $this->get_tag( $post, 'dc:creator' );410 if ($author)411 $this->allauthornames[] = $author;412 }413 414 362 function process_posts() { 415 echo '<ol>'; 363 foreach ( $this->posts as $post ) { 364 if ( isset( $this->processed_posts[$post['post_id']] ) ) 365 continue; 416 366 417 $this->get_entries(array(&$this, 'process_post')); 367 if ( 'nav_menu_item' == $post['post_type'] ) { 368 $this->process_menu_item( $post ); 369 continue; 370 } 418 371 419 echo '</ol>'; 372 $post_exists = post_exists( $post['post_title'], '', $post['post_date'] ); 373 if ( $post_exists ) { 374 $comment_post_ID = $post_id = $post_exists; 375 } else { 376 $post_parent = (int) $post['post_parent']; 377 if ( $post_parent ) { 378 // if we already know the parent, map it to the new local ID 379 if ( isset( $this->processed_posts[$post_parent] ) ) { 380 $post_parent = $this->processed_posts[$post_parent]; 381 // otherwise record the parent for later 382 } else { 383 $this->post_orphans[intval($post['post_id'])] = $post_parent; 384 $post_parent = 0; 385 } 386 } 420 387 421 wp_import_cleanup($this->id); 422 do_action('import_done', 'wordpress'); 388 // map the post author 389 $author = sanitize_user( $post['post_author'], true ); 390 if ( isset( $this->processed_authors[$author] ) ) 391 $author = $this->processed_authors[$author]; 392 else 393 $author = (int) get_current_user_id(); 423 394 424 echo '<h3>'.sprintf(__('All done.', 'wordpress-importer').' <a href="%s">'.__('Have fun!', 'wordpress-importer').'</a>', get_option('home')).'</h3>'; 425 } 395 $postdata = array( 396 'import_id' => $post['post_id'], 'post_author' => $author, 'post_date' => $post['post_date'], 397 'post_date_gmt' => $post['post_date_gmt'], 'post_content' => $post['post_content'], 398 'post_excerpt' => $post['post_excerpt'], 'post_title' => $post['post_title'], 399 'post_status' => $post['status'], 'post_name' => $post['post_name'], 400 'comment_status' => $post['comment_status'], 'ping_status' => $post['ping_status'], 401 'guid' => $post['guid'], 'post_parent' => $post_parent, 'menu_order' => $post['menu_order'], 402 'post_type' => $post['post_type'], 'post_password' => $post['post_password'] 403 ); 426 404 427 function _normalize_tag( $matches ) { 428 return '<' . strtolower( $matches[1] ); 429 } 405 if ( 'attachment' == $postdata['post_type'] ) { 406 $remote_url = ! empty($post['attachment_url']) ? $post['attachment_url'] : $post['guid']; 407 $comment_post_ID = $post_id = $this->process_attachment( $postdata, $remote_url ); 408 } else { 409 $comment_post_ID = $post_id = wp_insert_post( $postdata, true ); 410 } 430 411 431 function process_post($post) { 432 global $wpdb; 433 434 $post_ID = (int) $this->get_tag( $post, 'wp:post_id' ); 435 if ( $post_ID && !empty($this->post_ids_processed[$post_ID]) ) // Processed already 436 return 0; 437 438 set_time_limit( 60 ); 439 440 // There are only ever one of these 441 $post_title = $this->get_tag( $post, 'title' ); 442 $post_date = $this->get_tag( $post, 'wp:post_date' ); 443 $post_date_gmt = $this->get_tag( $post, 'wp:post_date_gmt' ); 444 $comment_status = $this->get_tag( $post, 'wp:comment_status' ); 445 $ping_status = $this->get_tag( $post, 'wp:ping_status' ); 446 $post_status = $this->get_tag( $post, 'wp:status' ); 447 $post_name = $this->get_tag( $post, 'wp:post_name' ); 448 $post_parent = $this->get_tag( $post, 'wp:post_parent' ); 449 $menu_order = $this->get_tag( $post, 'wp:menu_order' ); 450 $post_type = $this->get_tag( $post, 'wp:post_type' ); 451 $post_password = $this->get_tag( $post, 'wp:post_password' ); 452 $is_sticky = $this->get_tag( $post, 'wp:is_sticky' ); 453 $guid = $this->get_tag( $post, 'guid' ); 454 $post_author = $this->get_tag( $post, 'dc:creator' ); 455 456 $post_excerpt = $this->get_tag( $post, 'excerpt:encoded' ); 457 $post_excerpt = preg_replace_callback('|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_excerpt); 458 $post_excerpt = str_replace('<br>', '<br />', $post_excerpt); 459 $post_excerpt = str_replace('<hr>', '<hr />', $post_excerpt); 460 461 $post_content = $this->get_tag( $post, 'content:encoded' ); 462 $post_content = preg_replace_callback('|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_content); 463 $post_content = str_replace('<br>', '<br />', $post_content); 464 $post_content = str_replace('<hr>', '<hr />', $post_content); 465 466 preg_match_all('|<category domain="tag">(.*?)</category>|is', $post, $tags); 467 $tags = $tags[1]; 468 469 $tag_index = 0; 470 foreach ($tags as $tag) { 471 $tags[$tag_index] = $wpdb->escape( html_entity_decode( str_replace(array( '<![CDATA[', ']]>' ), '', $tag ) ) ); 472 $tag_index++; 473 } 474 475 preg_match_all('|<category>(.*?)</category>|is', $post, $categories); 476 $categories = $categories[1]; 477 478 $cat_index = 0; 479 foreach ($categories as $category) { 480 $categories[$cat_index] = $wpdb->escape( html_entity_decode( str_replace( array( '<![CDATA[', ']]>' ), '', $category ) ) ); 481 $cat_index++; 482 } 483 484 $post_exists = post_exists($post_title, '', $post_date); 485 486 if ( $post_exists ) { 487 echo '<li>'; 488 printf(__('Post <em>%s</em> already exists.', 'wordpress-importer'), stripslashes($post_title)); 489 $comment_post_ID = $post_id = $post_exists; 490 } else { 491 492 // If it has parent, process parent first. 493 $post_parent = (int) $post_parent; 494 if ($post_parent) { 495 // if we already know the parent, map it to the local ID 496 if ( isset( $this->post_ids_processed[$post_parent] ) ) { 497 $post_parent = $this->post_ids_processed[$post_parent]; // new ID of the parent 412 if ( is_wp_error( $post_id ) ) { 413 echo __( 'Error importing post object:', 'wordpress-importer' ) . ' ' . esc_html( $post_id->get_error_message() ) . '<br />'; 414 continue; 498 415 } 499 else {500 // record the parent for later501 $this->orphans[intval($post_ID)] = $post_parent;502 }503 }504 416 505 echo '<li>'; 506 507 $post_author = $this->checkauthor($post_author); //just so that if a post already exists, new users are not created by checkauthor 508 509 $postdata = compact('post_author', 'post_date', 'post_date_gmt', 'post_content', 'post_excerpt', 'post_title', 'post_status', 'post_name', 'comment_status', 'ping_status', 'guid', 'post_parent', 'menu_order', 'post_type', 'post_password'); 510 $postdata['import_id'] = $post_ID; 511 if ($post_type == 'attachment') { 512 $remote_url = $this->get_tag( $post, 'wp:attachment_url' ); 513 if ( !$remote_url ) 514 $remote_url = $guid; 515 516 $comment_post_ID = $post_id = $this->process_attachment($postdata, $remote_url); 517 if ( !$post_id or is_wp_error($post_id) ) 518 return $post_id; 519 } 520 else { 521 printf(__('Importing post <em>%s</em>...', 'wordpress-importer') . "\n", stripslashes($post_title)); 522 $comment_post_ID = $post_id = wp_insert_post($postdata); 523 if ( $post_id && $is_sticky == 1 ) 417 if ( $post['is_sticky'] == 1 ) 524 418 stick_post( $post_id ); 525 526 419 } 527 420 528 if ( is_wp_error( $post_id ) )529 return$post_id;421 // map pre-import ID to local ID 422 $this->processed_posts[intval($post['post_id'])] = (int) $post_id; 530 423 531 // Memorize old and new ID. 532 if ( $post_id && $post_ID ) { 533 $this->post_ids_processed[intval($post_ID)] = intval($post_id); 424 // add categories, tags and other terms 425 if ( ! empty( $post['terms'] ) ) { 426 foreach ( $post['terms'] as $term ) { 427 // back compat with WXR 1.0 map 'tag' to 'post_tag' 428 $taxonomy = ( 'tag' == $term['domain'] ) ? 'post_tag' : $term['domain']; 429 $term_exists = term_exists( $term['slug'], $taxonomy ); 430 $term_id = is_array( $term_exists ) ? $term_exists['term_id'] : $term_exists; 431 if ( ! $term_id ) { 432 $t = wp_insert_term( $term['name'], $taxonomy, array( 'slug' => $term['slug'] ) ); 433 if ( ! is_wp_error( $t ) ) { 434 $term_id = $t['term_id']; 435 } else { 436 echo __( 'Error importing term:', 'wordpress-importer' ) . ' ' . esc_html( $id->get_error_message() ) . '<br />'; 437 continue; 438 } 439 } 440 $terms_to_set[$taxonomy][] = intval( $term_id ); 441 } 442 443 foreach ( $terms_to_set as $tax => $ids ) { 444 $tt_ids = wp_set_post_terms( $post_id, $ids, $tax ); 445 } 446 unset( $post['terms'], $terms_to_set ); 534 447 } 535 448 536 // Add categories. 537 if (count($categories) > 0) { 538 $post_cats = array(); 539 foreach ($categories as $category) { 540 if ( '' == $category ) 541 continue; 542 $slug = sanitize_term_field('slug', $category, 0, 'category', 'db'); 543 $cat = get_term_by('slug', $slug, 'category'); 544 $cat_ID = 0; 545 if ( ! empty($cat) ) 546 $cat_ID = $cat->term_id; 547 if ($cat_ID == 0) { 548 $category = $wpdb->escape($category); 549 $cat_ID = wp_insert_category(array('cat_name' => $category)); 550 if ( is_wp_error($cat_ID) ) 551 continue; 449 // add/update comments 450 if ( ! empty( $post['comments'] ) ) { 451 $num_comments = 0; 452 $inserted_comments = array(); 453 foreach ( $post['comments'] as $comment ) { 454 $comment_id = $comment['comment_id']; 455 $newcomments[$comment_id]['comment_post_ID'] = $comment_post_ID; 456 $newcomments[$comment_id]['comment_author'] = $comment['comment_author']; 457 $newcomments[$comment_id]['comment_author_email'] = $comment['comment_author_email']; 458 $newcomments[$comment_id]['comment_author_IP'] = $comment['comment_author_IP']; 459 $newcomments[$comment_id]['comment_author_url'] = $comment['comment_author_url']; 460 $newcomments[$comment_id]['comment_date'] = $comment['comment_date']; 461 $newcomments[$comment_id]['comment_date_gmt'] = $comment['comment_date_gmt']; 462 $newcomments[$comment_id]['comment_content'] = $comment['comment_content']; 463 $newcomments[$comment_id]['comment_approved'] = $comment['comment_approved']; 464 $newcomments[$comment_id]['comment_type'] = ! empty( $comment['comment_type'] ) ? $comment['comment_type'] : 'comment'; 465 $newcomments[$comment_id]['comment_parent'] = $comment['comment_parent']; 466 } 467 ksort( $newcomments ); 468 469 foreach ( $newcomments as $key => $comment ) { 470 // if this is a new post we can skip the comment_exists() check 471 if ( ! $post_exists || ! comment_exists( $comment['comment_author'], $comment['comment_date'] ) ) { 472 if ( isset( $inserted_comments[$comment['comment_parent']] ) ) 473 $comment['comment_parent'] = $inserted_comments[$comment['comment_parent']]; 474 $comment = wp_filter_comment( $comment ); 475 $inserted_comments[$key] = wp_insert_comment( $comment ); 476 $num_comments++; 552 477 } 553 $post_cats[] = $cat_ID;554 478 } 555 wp_set_post_categories($post_id, $post_cats);479 unset( $newcomments, $inserted_comments, $post['comments'] ); 556 480 } 557 481 558 // Add tags. 559 if (count($tags) > 0) { 560 $post_tags = array(); 561 foreach ($tags as $tag) { 562 if ( '' == $tag ) 563 continue; 564 $slug = sanitize_term_field('slug', $tag, 0, 'post_tag', 'db'); 565 $tag_obj = get_term_by('slug', $slug, 'post_tag'); 566 $tag_id = 0; 567 if ( ! empty($tag_obj) ) 568 $tag_id = $tag_obj->term_id; 569 if ( $tag_id == 0 ) { 570 $tag = $wpdb->escape($tag); 571 $tag_id = wp_insert_term($tag, 'post_tag'); 572 if ( is_wp_error($tag_id) ) 573 continue; 574 $tag_id = $tag_id['term_id']; 482 // add/update post meta 483 if ( isset( $post['postmeta'] ) ) { 484 foreach ( $post['postmeta'] as $meta ) { 485 $key = apply_filters( 'import_post_meta_key', $meta['key'] ); 486 if ( $key ) { 487 update_post_meta( $post_id, $key, $meta['value'] ); 488 do_action( 'import_post_meta', $post_id, $key, $meta['value'] ); 575 489 } 576 $post_tags[] = intval($tag_id);577 490 } 578 wp_set_post_tags($post_id, $post_tags);579 491 } 580 492 } 493 } 581 494 582 // Now for comments 583 preg_match_all('|<wp:comment>(.*?)</wp:comment>|is', $post, $comments); 584 $comments = $comments[1]; 585 $num_comments = 0; 586 $inserted_comments = array(); 587 if ( $comments) { 588 foreach ($comments as $comment) { 589 $comment_id = $this->get_tag( $comment, 'wp:comment_id'); 590 $newcomments[$comment_id]['comment_post_ID'] = $comment_post_ID; 591 $newcomments[$comment_id]['comment_author'] = $this->get_tag( $comment, 'wp:comment_author'); 592 $newcomments[$comment_id]['comment_author_email'] = $this->get_tag( $comment, 'wp:comment_author_email'); 593 $newcomments[$comment_id]['comment_author_IP'] = $this->get_tag( $comment, 'wp:comment_author_IP'); 594 $newcomments[$comment_id]['comment_author_url'] = $this->get_tag( $comment, 'wp:comment_author_url'); 595 $newcomments[$comment_id]['comment_date'] = $this->get_tag( $comment, 'wp:comment_date'); 596 $newcomments[$comment_id]['comment_date_gmt'] = $this->get_tag( $comment, 'wp:comment_date_gmt'); 597 $newcomments[$comment_id]['comment_content'] = $this->get_tag( $comment, 'wp:comment_content'); 598 $newcomments[$comment_id]['comment_approved'] = $this->get_tag( $comment, 'wp:comment_approved'); 599 $newcomments[$comment_id]['comment_type'] = $this->get_tag( $comment, 'wp:comment_type'); 600 $newcomments[$comment_id]['comment_parent'] = $this->get_tag( $comment, 'wp:comment_parent'); 495 function process_menu_item( $item ) { 496 $menu_slug = false; 497 // loop through terms, assume first nav_menu term is correct menu 498 foreach ( $item['terms'] as $term ) { 499 if ( 'nav_menu' == $term['domain'] ) { 500 $menu_slug = $term['slug']; 501 break; 601 502 } 602 // Sort by comment ID, to make sure comment parents exist (if there at all)603 ksort($newcomments);604 foreach ($newcomments as $key => $comment) {605 // if this is a new post we can skip the comment_exists() check606 if ( !$post_exists || !comment_exists($comment['comment_author'], $comment['comment_date']) ) {607 if (isset($inserted_comments[$comment['comment_parent']]))608 $comment['comment_parent'] = $inserted_comments[$comment['comment_parent']];609 $comment = wp_filter_comment($comment);610 $inserted_comments[$key] = wp_insert_comment($comment);611 $num_comments++;612 }613 }614 503 } 615 504 616 if ( $num_comments ) 617 printf(' '._n('(%s comment)', '(%s comments)', $num_comments, 'wordpress-importer'), $num_comments); 505 // no nav_menu term associated with this menu item 506 if ( ! $menu_slug ) { 507 _e( 'Menu item skipped due to missing menu slug', 'wordpress-importer' ); 508 echo '<br />'; 509 return; 510 } 618 511 619 // Now for post meta 620 preg_match_all('|<wp:postmeta>(.*?)</wp:postmeta>|is', $post, $postmeta); 621 $postmeta = $postmeta[1]; 622 if ( $postmeta) { foreach ($postmeta as $p) { 623 $key = $this->get_tag( $p, 'wp:meta_key' ); 624 $value = $this->get_tag( $p, 'wp:meta_value' ); 512 $menu_id = term_exists( $menu_slug, 'nav_menu' ); 513 if ( ! $menu_id ) { 514 _e( sprintf( 'Menu item skipped due to invalid menu slug: %s', esc_html( $menu_slug ) ), 'wordpress-importer' ); 515 echo '<br />'; 516 return; 517 } else { 518 $menu_id = is_array( $menu_id ) ? $menu_id['term_id'] : $menu_id; 519 } 625 520 626 $this->process_post_meta($post_id, $key, $value); 521 foreach ( $item['postmeta'] as $meta ) 522 $$meta['key'] = $meta['value']; 627 523 628 } } 524 if ( 'taxonomy' == $_menu_item_type && isset( $this->processed_terms[intval($_menu_item_object_id)] ) ) { 525 $_menu_item_object_id = $this->processed_terms[intval($_menu_item_object_id)]; 526 } else if ( 'post_type' == $_menu_item_type && isset( $this->processed_posts[intval($_menu_item_object_id)] ) ) { 527 $_menu_item_object_id = $this->processed_posts[intval($_menu_item_object_id)]; 528 } else if ( 'custom' != $_menu_item_type ) { 529 // associated object is missing or not imported yet, we'll retry later 530 $this->missing_menu_items[] = $item; 531 return; 532 } 629 533 630 do_action('import_post_added', $post_id); 631 print "</li>\n"; 632 } 633 634 function process_post_meta($post_id, $key, $value) { 635 // the filter can return false to skip a particular metadata key 636 $_key = apply_filters('import_post_meta_key', $key); 637 if ( $_key ) { 638 add_post_meta( $post_id, $_key, $value ); 639 do_action('import_post_meta', $post_id, $_key, $value); 534 if ( isset( $this->processed_menu_items[intval($_menu_item_menu_item_parent)] ) ) { 535 $_menu_item_menu_item_parent = $this->processed_menu_items[intval($_menu_item_menu_item_parent)]; 536 } else if ( $_menu_item_menu_item_parent ) { 537 $this->menu_item_orphans[intval($item['post_id'])] = (int) $_menu_item_menu_item_parent; 538 $_menu_item_menu_item_parent = 0; 640 539 } 641 }642 540 643 function process_attachment($postdata, $remote_url) { 644 if ($this->fetch_attachments and $remote_url) { 645 printf( __('Importing attachment <em>%s</em>... ', 'wordpress-importer'), htmlspecialchars($remote_url) ); 541 $args = array( 542 'menu-item-object-id' => $_menu_item_object_id, 543 'menu-item-object' => $_menu_item_object, 544 'menu-item-parent-id' => $_menu_item_menu_item_parent, 545 'menu-item-position' => intval( $item['menu_order'] ), 546 'menu-item-type' => $_menu_item_type, 547 'menu-item-title' => $item['post_title'], 548 'menu-item-url' => $_menu_item_url, 549 'menu-item-description' => $item['post_content'], 550 'menu-item-attr-title' => $item['post_excerpt'], 551 'menu-item-target' => $_menu_item_target, 552 'menu-item-classes' => $_menu_item_classes, 553 'menu-item-xfn' => $_menu_item_xfn, 554 'menu-item-status' => $item['status'] 555 ); 646 556 647 // If the URL is absolute, but does not contain http, upload it assuming the base_site_url variable 648 if ( preg_match('/^\/[\w\W]+$/', $remote_url) ) 649 $remote_url = rtrim($this->base_url,'/').$remote_url; 557 $id = wp_update_nav_menu_item( $menu_id, 0, $args ); 558 if ( $id && ! is_wp_error( $id ) ) 559 $this->processed_menu_items[intval($item['post_id'])] = (int) $id; 560 } 650 561 651 $upload = $this->fetch_remote_file($postdata, $remote_url); 652 if ( is_wp_error($upload) ) { 653 printf( __('Remote file error: %s', 'wordpress-importer'), htmlspecialchars($upload->get_error_message()) ); 654 return $upload; 655 } 656 else { 657 print '('.size_format(filesize($upload['file'])).')'; 658 } 562 function process_attachment( $post, $url ) { 563 if ( ! ( $this->fetch_attachments && $url ) ) 564 return new WP_Error( 'attachment_processing_error', 565 __( 'Fetching attachments is not allowed or an empty URL was provided', 'wordpress-importer' ) ); 659 566 660 if ( 0 == filesize( $upload['file'] ) ) { 661 print __( "Zero length file, deleting" , 'wordpress-importer') . "\n"; 662 unlink( $upload['file'] ); 663 return; 664 } 567 // if the URL is absolute, but does not contain address, then upload it assuming base_site_url 568 if ( preg_match( '|^/[\w\W]+$|', $url ) ) 569 $url = rtrim( $this->base_url, '/' ) . $url; 665 570 666 if ( $info = wp_check_filetype($upload['file']) ) { 667 $postdata['post_mime_type'] = $info['type']; 668 } 669 else { 670 print __('Invalid file type', 'wordpress-importer'); 671 return; 672 } 571 $upload = $this->fetch_remote_file( $url, $post ); 572 if ( is_wp_error( $upload ) ) 573 return $upload; 673 574 674 $postdata['guid'] = $upload['url']; 575 if ( $info = wp_check_filetype( $upload['file'] ) ) 576 $post['post_mime_type'] = $info['type']; 577 else 578 return new WP_Error( 'attachment_processing_error', __('Invalid file type', 'wordpress-importer') ); 675 579 676 // as per wp-admin/includes/upload.php 677 $post_id = wp_insert_attachment($postdata, $upload['file']); 678 wp_update_attachment_metadata( $post_id, wp_generate_attachment_metadata( $post_id, $upload['file'] ) ); 580 $post['guid'] = $upload['url']; 679 581 680 // remap the thumbnail url. this isn't perfect because we're just guessing the original url. 681 if ( preg_match('@^image/@', $info['type']) && $thumb_url = wp_get_attachment_thumb_url($post_id) ) { 682 $parts = pathinfo($remote_url); 683 $ext = $parts['extension']; 684 $name = basename($parts['basename'], ".{$ext}"); 685 $this->url_remap[$parts['dirname'] . '/' . $name . '.thumbnail.' . $ext] = $thumb_url; 686 } 582 // as per wp-admin/includes/upload.php 583 $post_id = wp_insert_attachment( $post, $upload['file'] ); 584 wp_update_attachment_metadata( $post_id, wp_generate_attachment_metadata( $post_id, $upload['file'] ) ); 687 585 688 return $post_id; 586 // remap the thumbnail url. this isn't perfect because we're just guessing the original url. 587 if ( preg_match( '@^image/@', $info['type'] ) && $thumb_url = wp_get_attachment_thumb_url( $post_id ) ) { 588 $parts = pathinfo( $url ); 589 $ext = $parts['extension']; 590 $name = basename($parts['basename'], ".{$ext}"); 591 $this->url_remap[$parts['dirname'] . '/' . $name . '.thumbnail.' . $ext] = $thumb_url; 689 592 } 690 else { 691 printf( __('Skipping attachment <em>%s</em>', 'wordpress-importer'), htmlspecialchars($remote_url) ); 692 } 593 594 return $post_id; 693 595 } 694 596 695 function fetch_remote_file( $ post, $url) {597 function fetch_remote_file( $url, $post ) { 696 598 add_filter( 'http_request_timeout', array( &$this, 'bump_request_timeout' ) ); 697 599 698 $upload = wp_upload_dir($post['post_date']);699 700 600 // extract the file name and extension from the url 701 $file_name = basename( $url);601 $file_name = basename( $url ); 702 602 703 // get placeholder file in the upload dir with a unique sanitized filename 704 $upload = wp_upload_bits( $file_name, 0, '', $post['post_date']); 705 if ( $upload['error'] ) { 706 echo $upload['error']; 603 // get placeholder file in the upload dir with a unique, sanitized filename 604 $upload = wp_upload_bits( $file_name, 0, '', $post['post_date'] ); 605 if ( $upload['error'] ) 707 606 return new WP_Error( 'upload_dir_error', $upload['error'] ); 708 }709 607 710 608 // fetch the remote url and write it to the placeholder file 711 $headers = wp_get_http( $url, $upload['file']);609 $headers = wp_get_http( $url, $upload['file'] ); 712 610 713 // Request failed611 // request failed 714 612 if ( ! $headers ) { 715 @unlink( $upload['file']);613 @unlink( $upload['file'] ); 716 614 return new WP_Error( 'import_file_error', __('Remote server did not respond', 'wordpress-importer') ); 717 615 } 718 616 719 617 // make sure the fetch was successful 720 618 if ( $headers['response'] != '200' ) { 721 @unlink( $upload['file']);722 return new WP_Error( 'import_file_error', sprintf( __('Remote filereturned error response %1$d %2$s', 'wordpress-importer'), $headers['response'], get_status_header_desc($headers['response']) ) );619 @unlink( $upload['file'] ); 620 return new WP_Error( 'import_file_error', sprintf( __('Remote server returned error response %1$d %2$s', 'wordpress-importer'), $headers['response'], get_status_header_desc($headers['response']) ) ); 723 621 } 724 elseif ( isset($headers['content-length']) && filesize($upload['file']) != $headers['content-length'] ) { 725 @unlink($upload['file']); 622 623 $filesize = filesize( $upload['file'] ); 624 625 if ( isset( $headers['content-length'] ) && $filesize != $headers['content-length'] ) { 626 @unlink( $upload['file'] ); 726 627 return new WP_Error( 'import_file_error', __('Remote file is incorrect size', 'wordpress-importer') ); 727 628 } 728 629 729 $max_size = $this->max_attachment_size(); 730 if ( !empty($max_size) and filesize($upload['file']) > $max_size ) { 731 @unlink($upload['file']); 630 if ( 0 == $filesize ) { 631 @unlink( $upload['file'] ); 632 return new WP_Error( 'import_file_error', __('Zero size file downloaded', 'wordpress-importer') ); 633 } 634 635 $max_size = (int) $this->max_attachment_size(); 636 if ( ! empty( $max_size ) && $filesize > $max_size ) { 637 @unlink( $upload['file'] ); 732 638 return new WP_Error( 'import_file_error', sprintf(__('Remote file is too large, limit is %s', size_format($max_size), 'wordpress-importer')) ); 733 639 } 734 640 … … 736 642 $this->url_remap[$url] = $upload['url']; 737 643 $this->url_remap[$post['guid']] = $upload['url']; 738 644 // if the remote url is redirected somewhere else, keep track of the destination too 739 if ( $headers['x-final-location'] != $url )645 if ( isset($headers['x-final-location']) && $headers['x-final-location'] != $url ) 740 646 $this->url_remap[$headers['x-final-location']] = $upload['url']; 741 647 742 648 return $upload; 743 744 649 } 745 650 746 /** 747 * Bump up the request timeout for http requests 748 * 749 * @param int $val 750 * @return int 751 */ 752 function bump_request_timeout( $val ) { 753 return 60; 754 } 651 function backfill_parents() { 652 global $wpdb; 755 653 756 // sort by strlen, longest string first 757 function cmpr_strlen($a, $b) { 758 return strlen($b) - strlen($a); 654 // find parents for post orphans 655 foreach ( $this->post_orphans as $child_id => $parent_id ) { 656 $local_child_id = $local_parent_id = false; 657 if ( isset( $this->processed_posts[$child_id] ) ) 658 $local_child_id = $this->processed_posts[$child_id]; 659 if ( isset( $this->processed_posts[$parent_id] ) ) 660 $local_parent_id = $this->processed_posts[$parent_id]; 661 662 if ( $local_child_id && $local_parent_id ) 663 $wpdb->update( $wpdb->posts, array( 'post_parent' => $local_parent_id ), array( 'ID' => $local_child_id ), '%d', '%d' ); 664 } 665 666 // all other posts/terms are imported, retry menu items with missing associated object 667 $missing_menu_items = $this->missing_menu_items; 668 foreach ( $missing_menu_items as $item ) 669 $this->process_menu_item( $item ); 670 671 // find parents for menu item orphans 672 foreach ( $this->menu_item_orphans as $child_id => $parent_id ) { 673 $local_child_id = $local_parent_id = 0; 674 if ( isset( $this->processed_menu_items[$child_id] ) ) 675 $local_child_id = $this->processed_menu_items[$child_id]; 676 if ( isset( $this->processed_menu_items[$parent_id] ) ) 677 $local_parent_id = $this->processed_menu_items[$parent_id]; 678 679 if ( $local_child_id && $local_parent_id ) 680 update_post_meta( $local_child_id, '_menu_item_menu_item_parent', (int) $local_parent_id ); 681 } 759 682 } 760 683 761 // update url references in post bodies to point to the new local files762 684 function backfill_attachment_urls() { 685 global $wpdb; 763 686 764 687 // make sure we do the longest urls first, in case one is a substring of another 765 uksort( $this->url_remap, array(&$this, 'cmpr_strlen'));688 uksort( $this->url_remap, array(&$this, 'cmpr_strlen') ); 766 689 767 global $wpdb; 768 foreach ($this->url_remap as $from_url => $to_url) { 690 foreach ( $this->url_remap as $from_url => $to_url ) { 769 691 // remap urls in post_content 770 $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->posts} SET post_content = REPLACE(post_content, '%s', '%s')", $from_url, $to_url) );692 $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->posts} SET post_content = REPLACE(post_content, %s, %s)", $from_url, $to_url) ); 771 693 // remap enclosure urls 772 $result = $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->postmeta} SET meta_value = REPLACE(meta_value, '%s', '%s') WHERE meta_key='enclosure'", $from_url, $to_url) );694 $result = $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->postmeta} SET meta_value = REPLACE(meta_value, %s, %s) WHERE meta_key='enclosure'", $from_url, $to_url) ); 773 695 } 774 696 } 775 697 776 // update the post_parent of orphans now that we know the local id's of all parents 777 function backfill_parents() { 778 global $wpdb; 698 function parse( $file ) { 699 $parser = new WXR_Parser(); 700 return $parser->parse( $file ); 701 } 779 702 780 foreach ($this->orphans as $child_id => $parent_id) { 781 $local_child_id = $local_parent_id = false; 782 if ( isset( $this->post_ids_processed[$child_id] ) ) 783 $local_child_id = $this->post_ids_processed[$child_id]; 784 if ( isset( $this->post_ids_processed[$parent_id] ) ) 785 $local_parent_id = $this->post_ids_processed[$parent_id]; 703 function header() { 704 echo '<div class="wrap">'; 705 screen_icon(); 706 echo '<h2>' . __( 'Import WordPress', 'wordpress-importer' ) . '</h2>'; 707 } 786 708 787 if ($local_child_id and $local_parent_id) { 788 $wpdb->update($wpdb->posts, array('post_parent' => $local_parent_id), array('ID' => $local_child_id) ); 789 } 790 } 709 function footer() { 710 echo '</div>'; 791 711 } 792 712 793 function is_valid_meta_key($key) { 713 function greet() { 714 echo '<div class="narrow">'; 715 echo '<p>'.__( 'Howdy! Upload your WordPress eXtended RSS (WXR) file and we’ll import the posts, pages, comments, custom fields, categories, and tags into this site.', 'wordpress-importer' ).'</p>'; 716 echo '<p>'.__( 'Choose a WXR file to upload, then click Upload file and import.', 'wordpress-importer' ).'</p>'; 717 wp_import_upload_form( 'admin.php?import=wordpress&step=1' ); 718 echo '</div>'; 719 } 720 721 function is_valid_meta_key( $key ) { 794 722 // skip attachment metadata since we'll regenerate it from scratch 795 if ( $key == '_wp_attached_file' || $key == '_wp_attachment_metadata' ) 723 // skip _edit_lock and _edit_last as not useful 724 if ( in_array( $key, array( '_wp_attached_file', '_wp_attachment_metadata', '_edit_lock', '_edit_last' ) ) ) 796 725 return false; 797 726 return $key; 798 727 } 799 728 800 729 // give the user the option of creating new users to represent authors in the import file? 801 730 function allow_create_users() { 802 return apply_filters( 'import_allow_create_users', true);731 return apply_filters( 'import_allow_create_users', true ); 803 732 } 804 733 805 734 // give the user the option of downloading and importing attached files 806 735 function allow_fetch_attachments() { 807 return apply_filters( 'import_allow_fetch_attachments', true);736 return apply_filters( 'import_allow_fetch_attachments', true ); 808 737 } 809 738 810 function max_attachment_size() { 811 // can be overridden with a filter - 0 means no limit 812 return apply_filters('import_attachment_size_limit', 0); 739 function bump_request_timeout() { 740 return 60; 813 741 } 814 742 815 function import_start() { 816 wp_defer_term_counting(true); 817 wp_defer_comment_counting(true); 818 do_action('import_start'); 743 function max_attachment_size() { 744 return apply_filters( 'import_attachment_size_limit', 0 ); 819 745 } 820 746 821 function import_end() { 822 do_action('import_end'); 823 824 // clear the caches after backfilling 825 foreach ($this->post_ids_processed as $post_id) 826 clean_post_cache($post_id); 827 828 wp_defer_term_counting(false); 829 wp_defer_comment_counting(false); 747 function cmpr_strlen( $a, $b ) { 748 return strlen($b) - strlen($a); 830 749 } 831 832 function import($id, $fetch_attachments = false) {833 $this->id = (int) $id;834 $this->fetch_attachments = ($this->allow_fetch_attachments() && (bool) $fetch_attachments);835 836 add_filter('import_post_meta_key', array($this, 'is_valid_meta_key'));837 $file = get_attached_file($this->id);838 $this->import_file($file);839 }840 841 function import_file($file) {842 $this->file = $file;843 844 $this->import_start();845 $this->get_authors_from_post();846 wp_suspend_cache_invalidation(true);847 $this->get_entries();848 $this->process_categories();849 $this->process_tags();850 $this->process_terms();851 $result = $this->process_posts();852 wp_suspend_cache_invalidation(false);853 $this->backfill_parents();854 $this->backfill_attachment_urls();855 $this->import_end();856 857 if ( is_wp_error( $result ) )858 return $result;859 }860 861 function handle_upload() {862 $file = wp_import_handle_upload();863 if ( isset($file['error']) ) {864 echo '<p>'.__('Sorry, there has been an error.', 'wordpress-importer').'</p>';865 echo '<p><strong>' . $file['error'] . '</strong></p>';866 return false;867 }868 $this->file = $file['file'];869 $this->id = (int) $file['id'];870 return true;871 }872 873 function dispatch() {874 if (empty ($_GET['step']))875 $step = 0;876 else877 $step = (int) $_GET['step'];878 879 $this->header();880 switch ($step) {881 case 0 :882 $this->greet();883 break;884 case 1 :885 check_admin_referer('import-upload');886 if ( $this->handle_upload() )887 $this->select_authors();888 break;889 case 2:890 check_admin_referer('import-wordpress');891 $fetch_attachments = ! empty( $_POST['attachments'] );892 $result = $this->import( $_GET['id'], $fetch_attachments);893 if ( is_wp_error( $result ) )894 echo $result->get_error_message();895 break;896 }897 $this->footer();898 }899 900 function WP_Import() {901 // Nothing.902 }903 750 } 904 751 905 752 /**