Index: trunk/parsers.php
===================================================================
--- trunk/parsers.php	(revision 0)
+++ trunk/parsers.php	(revision 0)
@@ -0,0 +1,560 @@
+<?php
+/**
+ * WordPress eXtended RSS file parser implementations
+ *
+ * @package WordPress
+ * @subpackage Importer
+ */
+
+/**
+ * WordPress Importer class for managing parsing of WXR files.
+ */
+class WXR_Parser {
+	function parse( $file ) {
+		if ( extension_loaded( 'simplexml' ) )
+			$parser = new WXR_Parser_SimpleXML;
+		else if ( extension_loaded( 'xml' ) )
+			$parser = new WXR_Parser_XML;
+		else
+			$parser = new WXR_Parser_Regex;
+
+		return $parser->parse( $file );
+	}
+}
+
+/**
+ * WXR Parser that makes use of the SimpleXML PHP extension.
+ */
+class WXR_Parser_SimpleXML {
+	function parse( $file ) {
+		$authors = $posts = $categories = $tags = $terms = array();
+
+		$internal_errors = libxml_use_internal_errors(true);
+		$xml = simplexml_load_file( $file );
+		// halt if loading produces an error
+		if ( ! $xml )
+			return new WP_Error( 'WXR_parse_error', __( 'There was an error when reading this WXR file', 'wordpress-importer' ) );
+
+		$wxr_version = $xml->xpath('/rss/channel/wp:wxr_version');
+		if ( ! $wxr_version )
+			return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) );
+
+		$wxr_version = (string) trim( $wxr_version[0] );
+		// confirm that we are dealing with the correct file format
+		if ( ! preg_match( '/^\d\.\d$/', $wxr_version ) )
+			return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) );
+
+		$base_url = $xml->xpath('/rss/channel/wp:base_site_url');
+		$base_url = (string) trim( $base_url[0] );
+
+		$namespaces = $xml->getDocNamespaces();
+		if ( ! isset( $namespaces['wp'] ) )
+			$namespaces['wp'] = 'http://wordpress.org/export/1.1/';
+		if ( ! isset( $namespaces['excerpt'] ) )
+			$namespaces['excerpt'] = 'http://wordpress.org/export/1.1/excerpt/';
+
+		// grab authors
+		foreach ( $xml->xpath('/rss/channel/wp:author') as $author_arr ) {
+			$a = $author_arr->children( $namespaces['wp'] );
+			$login = (string) $a->author_login;
+			$authors[$login] = array(
+				'author_login' => $login,
+				'author_email' => (string) $a->author_email,
+				'author_display_name' => (string) $a->author_display_name,
+				'author_first_name' => (string) $a->author_first_name,
+				'author_last_name' => (string) $a->author_last_name
+			);
+		}
+
+		// grab cats, tags and terms
+		foreach ( $xml->xpath('/rss/channel/wp:category') as $term_arr ) {
+			$t = $term_arr->children( $namespaces['wp'] );
+			$categories[] = array(
+				'term_id' => (int) $t->term_id,
+				'category_nicename' => (string) $t->category_nicename,
+				'category_parent' => (string) $t->category_parent,
+				'cat_name' => (string) $t->cat_name,
+				'category_description' => (string) $t->category_description
+			);
+		}
+
+		foreach ( $xml->xpath('/rss/channel/wp:tag') as $term_arr ) {
+			$t = $term_arr->children( $namespaces['wp'] );
+			$tags[] = array(
+				'term_id' => (int) $t->term_id,
+				'tag_slug' => (string) $t->tag_slug,
+				'tag_name' => (string) $t->tag_name,
+				'tag_description' => (string) $t->tag_description
+			);
+		}
+
+		foreach ( $xml->xpath('/rss/channel/wp:term') as $term_arr ) {
+			$t = $term_arr->children( $namespaces['wp'] );
+			$terms[] = array(
+				'term_id' => (int) $t->term_id,
+				'term_taxonomy' => (string) $t->term_taxonomy,
+				'slug' => (string) $t->term_slug,
+				'term_parent' => (string) $t->term_parent,
+				'term_name' => (string) $t->term_name,
+				'term_description' => (string) $t->term_description
+			);
+		}
+
+		// grab posts
+		foreach ( $xml->channel->item as $item ) {
+			$post = array(
+				'post_title' => (string) $item->title,
+				'guid' => (string) $item->guid,
+			);
+
+			$dc = $item->children( 'http://purl.org/dc/elements/1.1/' );
+			$post['post_author'] = (string) $dc->creator;
+
+			$content = $item->children( 'http://purl.org/rss/1.0/modules/content/' );
+			$excerpt = $item->children( $namespaces['excerpt'] );
+			$post['post_content'] = (string) $content->encoded;
+			$post['post_excerpt'] = (string) $excerpt->encoded;
+
+			$wp = $item->children( $namespaces['wp'] );
+			$post['post_id'] = (int) $wp->post_id;
+			$post['post_date'] = (string) $wp->post_date;
+			$post['post_date_gmt'] = (string) $wp->post_date_gmt;
+			$post['comment_status'] = (string) $wp->comment_status;
+			$post['ping_status'] = (string) $wp->ping_status;
+			$post['post_name'] = (string) $wp->post_name;
+			$post['status'] = (string) $wp->status;
+			$post['post_parent'] = (int) $wp->post_parent;
+			$post['menu_order'] = (int) $wp->menu_order;
+			$post['post_type'] = (string) $wp->post_type;
+			$post['post_password'] = (string) $wp->post_password;
+			$post['is_sticky'] = (int) $wp->is_sticky;
+
+			foreach ( $item->category as $c ) {
+				$att = $c->attributes();
+				if ( isset( $att['nicename'] ) )
+					$post['terms'][] = array(
+						'name' => (string) $c,
+						'slug' => (string) $att['nicename'],
+						'domain' => (string) $att['domain']
+					);
+			}
+
+			foreach ( $wp->postmeta as $meta ) {
+				$post['postmeta'][] = array(
+					'key' => (string) $meta->meta_key,
+					'value' => (string) $meta->meta_value,
+				);
+			}
+
+			foreach ( $wp->comment as $comment ) {
+				$post['comments'][] = array(
+					'comment_id' => (int) $comment->comment_id,
+					'comment_author' => (string) $comment->comment_author,
+					'comment_author_email' => (string) $comment->comment_author_email,
+					'comment_author_IP' => (string) $comment->comment_author_IP,
+					'comment_author_url' => (string) $comment->comment_author_url,
+					'comment_date' => (string) $comment->comment_date,
+					'comment_date_gmt' => (string) $comment->comment_date_gmt,
+					'comment_content' => (string) $comment->comment_content,
+					'comment_approved' => (string) $comment->comment_approved,
+					'comment_type' => (string) $comment->comment_type,
+					'comment_parent' => (string) $comment->comment_parent,
+					'comment_user_id' => (int) $comment->comment_user_id,
+				);
+			}
+
+			$posts[] = $post;
+		}
+
+		return array(
+			'authors' => $authors,
+			'posts' => $posts,
+			'categories' => $categories,
+			'tags' => $tags,
+			'terms' => $terms,
+			'base_url' => $base_url
+		);
+	}
+}
+
+/**
+ * WXR Parser that makes use of the XML Parser PHP extension.
+ */
+class WXR_Parser_XML {
+	var $wp_tags = array(
+		'wp:post_id', 'wp:post_date', 'wp:post_date_gmt', 'wp:comment_status', 'wp:ping_status',
+		'wp:status', 'wp:post_name', 'wp:post_parent', 'wp:menu_order', 'wp:post_type', 'wp:post_password',
+		'wp:is_sticky', 'wp:term_id', 'wp:category_nicename', 'wp:category_parent', 'wp:cat_name', 'wp:category_description',
+		'wp:tag_slug', 'wp:tag_name', 'wp:tag_description', 'wp:term_taxonomy', 'wp:term_parent',
+		'wp:term_name', 'wp:term_description', 'wp:author_login', 'wp:author_email', 'wp:author_display_name',
+		'wp:author_first_name', 'wp:author_last_name',
+	);
+	var $wp_sub_tags = array(
+		'wp:comment_id', 'wp:comment_author', 'wp:comment_author_email', 'wp:comment_author_url',
+		'wp:comment_author_IP',	'wp:comment_date', 'wp:comment_date_gmt', 'wp:comment_content',
+		'wp:comment_approved', 'wp:comment_type', 'wp:comment_parent', 'wp:comment_user_id',
+	);
+
+	function parse( $file ) {
+		$this->is_wxr_file = $this->in_post = $this->cdata = $this->data = $this->sub_data = $this->in_tag = $this->in_sub_tag = false;
+		$this->authors = $this->posts = $this->term = $this->category = $this->tag = array();
+
+		$xml = xml_parser_create( 'UTF-8' );
+		xml_parser_set_option( $xml, XML_OPTION_SKIP_WHITE, 1 );
+		xml_parser_set_option( $xml, XML_OPTION_CASE_FOLDING, 0 );
+		xml_set_object( $xml, $this );
+		xml_set_character_data_handler( $xml, 'cdata' );
+		xml_set_element_handler( $xml, 'tag_open', 'tag_close' );
+
+		if ( ! xml_parse( $xml, file_get_contents( $file ), true ) ) {
+			$error_code = xml_get_error_code( $xml );
+			$error_string = xml_error_string( $error_code );
+			return new WP_Error( 'WXR_parse_error', 'There was an error when reading this WXR file', array( $error_code, $error_string ) );
+		}
+		xml_parser_free( $xml );
+
+		if ( ! $this->is_wxr_file )
+			return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) );
+
+		return array(
+			'authors' => $this->authors,
+			'posts' => $this->posts,
+			'categories' => $this->category,
+			'tags' => $this->tag,
+			'terms' => $this->term,
+			'base_url' => $this->base_url
+		);
+	}
+
+	function tag_open( $parse, $tag, $attr ) {
+		if ( in_array( $tag, $this->wp_tags ) ) {
+			$this->in_tag = substr( $tag, 3 );
+			return;
+		}
+
+		if ( in_array( $tag, $this->wp_sub_tags ) ) {
+			$this->in_sub_tag = substr( $tag, 3 );
+			return;
+		}
+
+		switch ( $tag ) {
+			case 'category':
+				if ( isset($attr['domain'], $attr['nicename']) ) {
+					$this->sub_data['domain'] = $attr['domain'];
+					$this->sub_data['slug'] = $attr['nicename'];
+				}
+				break;
+			case 'item': $this->in_post = true;
+			case 'title': if ( $this->in_post ) $this->in_tag = 'post_title'; break;
+			case 'guid': $this->in_tag = 'guid'; break;
+			case 'dc:creator': $this->in_tag = 'post_author'; break;
+			case 'content:encoded': $this->in_tag = 'post_content'; break;
+			case 'excerpt:encoded': $this->in_tag = 'post_excerpt'; break;
+
+			case 'wp:term_slug': $this->in_tag = 'slug'; break;
+			case 'wp:meta_key': $this->in_sub_tag = 'key'; break;
+			case 'wp:meta_value': $this->in_sub_tag = 'value'; break;
+		}
+	}
+
+	function cdata( $parser, $cdata ) {
+		if ( ! trim( $cdata ) )
+			return;
+
+		$this->cdata .= trim( $cdata );
+	}
+
+	function tag_close( $parser, $tag ) {
+		switch ( $tag ) {
+			case 'wp:comment':
+				if ( ! empty( $this->sub_data ) )
+					$this->data['comments'][] = $this->sub_data;
+				$this->sub_data = false;
+				break;
+			case 'category':
+				if ( ! empty( $this->sub_data ) ) {
+					$this->sub_data['name'] = $this->cdata;
+					$this->data['terms'][] = $this->sub_data;
+				}
+				$this->sub_data = false;
+				break;
+			case 'wp:postmeta':
+				if ( ! empty( $this->sub_data ) )
+					$this->data['postmeta'][] = $this->sub_data;
+				$this->sub_data = false;
+				break;
+			case 'item':
+				$this->posts[] = $this->data;
+				$this->data = false;
+				break;
+			case 'wp:category':
+			case 'wp:tag':
+			case 'wp:term':
+				$n = substr( $tag, 3 );
+				array_push( $this->$n, $this->data );
+				$this->data = false;
+				break;
+			case 'wp:author':
+				if ( ! empty($this->data['author_login']) )
+					$this->authors[$this->data['author_login']] = $this->data;
+				$this->data = false;
+				break;
+			case 'wp:base_site_url':
+				$this->base_url = $this->cdata;
+				break;
+			case 'wp:wxr_version':
+				$this->is_wxr_file = preg_match( '/\d+\.\d+/', $this->cdata );
+				break;
+
+			default:
+				if ( $this->in_sub_tag ) {
+					$this->sub_data[$this->in_sub_tag] = ! empty( $this->cdata ) ? $this->cdata : '';
+					$this->in_sub_tag = false;
+				} else if ( $this->in_tag ) {
+					$this->data[$this->in_tag] = ! empty( $this->cdata ) ? $this->cdata : '';
+					$this->in_tag = false;
+				}
+		}
+
+		$this->cdata = false;
+	}
+}
+
+/**
+ * WXR Parser that uses regular expressions. Fallback for installs without an XML parser.
+ */
+class WXR_Parser_Regex {
+	function WXR_Parser_Regex() {
+		$this->__construct();
+	}
+
+	function __construct() {
+		$this->has_gzip = is_callable( 'gzopen' );
+	}
+
+	function parse( $file ) {
+		$is_wxr = $in_post = false;
+
+		$fp = $this->fopen( $file, 'r' );
+		if ( $fp ) {
+			while ( ! $this->feof( $fp ) ) {
+				$importline = rtrim( $this->fgets( $fp ) );
+
+				if ( ! $is_wxr && preg_match( '|<wp:wxr_version>\d+\.\d+</wp:wxr_version>|', $importline ) )
+					$is_wxr = true;
+
+				if ( false !== strpos( $importline, '<wp:base_site_url>' ) ) {
+					preg_match( '|<wp:base_site_url>(.*?)</wp:base_site_url>|is', $importline, $url );
+					$this->base_url = $url[1];
+					continue;
+				}
+				if ( false !== strpos( $importline, '<wp:category>' ) ) {
+					preg_match( '|<wp:category>(.*?)</wp:category>|is', $importline, $category );
+					$this->categories[] = $this->process_category( $category[1] );
+					continue;
+				}
+				if ( false !== strpos( $importline, '<wp:tag>' ) ) {
+					preg_match( '|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag );
+					$this->tags[] = $this->process_tag( $tag[1] );
+					continue;
+				}
+				if ( false !== strpos( $importline, '<wp:term>' ) ) {
+					preg_match( '|<wp:term>(.*?)</wp:term>|is', $importline, $term );
+					$this->terms[] = $this->process_term( $term[1] );
+					continue;
+				}
+				if ( false !== strpos( $importline, '<wp:author>' ) ) {
+					preg_match( '|<wp:author>(.*?)</wp:author>|is', $importline, $author );
+					$a = $this->process_author( $author[1] );
+					$this->authors[$a['author_login']] = $a;
+					continue;
+				}
+				if ( false !== strpos( $importline, '<item>' ) ) {
+					$post = '';
+					$in_post = true;
+					continue;
+				}
+				if ( false !== strpos( $importline, '</item>' ) ) {
+					$in_post = false;
+					$this->posts[] = $this->process_post( $post );
+					continue;
+				}
+				if ( $in_post ) {
+					$post .= $importline . "\n";
+				}
+			}
+
+			$this->fclose($fp);
+		}
+
+		if ( ! $is_wxr )
+			return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) );
+
+		return array(
+			'authors' => $this->authors,
+			'posts' => $this->posts,
+			'categories' => $this->categories,
+			'tags' => $this->tags,
+			'terms' => $this->terms,
+			'base_url' => $this->base_url
+		);
+	}
+
+	function get_tag( $string, $tag ) {
+		global $wpdb;
+		preg_match( "|<$tag.*?>(.*?)</$tag>|is", $string, $return );
+		if ( isset( $return[1] ) ) {
+			$return = preg_replace( '|^<!\[CDATA\[(.*)\]\]>$|s', '$1', $return[1] );
+			$return = $wpdb->escape( trim( $return ) );
+		} else {
+			$return = '';
+		}
+		return $return;
+	}
+
+	function process_category( $c ) {
+		return array(
+			'term_id' => $this->get_tag( $c, 'wp:term_id' ),
+			'cat_name' => $this->get_tag( $c, 'wp:cat_name' ),
+			'category_nicename'	=> $this->get_tag( $c, 'wp:category_nicename' ),
+			'category_parent' => $this->get_tag( $c, 'wp:category_parent' ),
+			'category_description' => $this->get_tag( $c, 'wp:category_description' ),
+		);
+	}
+
+	function process_tag( $t ) {
+		return array(
+			'term_id' => $this->get_tag( $t, 'wp:term_id' ),
+			'tag_name' => $this->get_tag( $t, 'wp:tag_name' ),
+			'tag_slug' => $this->get_tag( $t, 'wp:tag_slug' ),
+			'tag_description' => $this->get_tag( $t, 'wp:tag_description' ),
+		);
+	}
+
+	function process_term( $t ) {
+		return array(
+			'term_id' => $this->get_tag( $t, 'wp:term_id' ),
+			'term_taxonomy' => $this->get_tag( $t, 'wp:term_taxonomy' ),
+			'slug' => $this->get_tag( $t, 'wp:term_slug' ),
+			'term_parent' => $this->get_tag( $t, 'wp:term_parent' ),
+			'term_name' => $this->get_tag( $t, 'wp:term_name' ),
+			'term_description' => $this->get_tag( $t, 'wp:term_description' ),
+		);
+	}
+
+	function process_author( $a ) {
+		return array(
+			'author_login' => $this->get_tag( $a, 'wp:author_login' ),
+			'author_email' => $this->get_tag( $a, 'wp:author_email' ),
+			'author_display_name' => $this->get_tag( $a, 'wp:author_display_name' ),
+			'author_first_name' => $this->get_tag( $a, 'wp:author_first_name' ),
+			'author_last_name' => $this->get_tag( $a, 'wp:author_last_name' ),
+		);
+	}
+
+	function process_post( $post ) {
+		$post_id        = $this->get_tag( $post, 'wp:post_id' );
+		$post_title     = $this->get_tag( $post, 'title' );
+		$post_date      = $this->get_tag( $post, 'wp:post_date' );
+		$post_date_gmt  = $this->get_tag( $post, 'wp:post_date_gmt' );
+		$comment_status = $this->get_tag( $post, 'wp:comment_status' );
+		$ping_status    = $this->get_tag( $post, 'wp:ping_status' );
+		$status         = $this->get_tag( $post, 'wp:status' );
+		$post_name      = $this->get_tag( $post, 'wp:post_name' );
+		$post_parent    = $this->get_tag( $post, 'wp:post_parent' );
+		$menu_order     = $this->get_tag( $post, 'wp:menu_order' );
+		$post_type      = $this->get_tag( $post, 'wp:post_type' );
+		$post_password  = $this->get_tag( $post, 'wp:post_password' );
+		$is_sticky		= $this->get_tag( $post, 'wp:is_sticky' );
+		$guid           = $this->get_tag( $post, 'guid' );
+		$post_author    = $this->get_tag( $post, 'dc:creator' );
+
+		$post_excerpt = $this->get_tag( $post, 'excerpt:encoded' );
+		$post_excerpt = preg_replace_callback( '|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_excerpt );
+		$post_excerpt = str_replace( '<br>', '<br />', $post_excerpt );
+		$post_excerpt = str_replace( '<hr>', '<hr />', $post_excerpt );
+
+		$post_content = $this->get_tag( $post, 'content:encoded' );
+		$post_content = preg_replace_callback( '|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_content );
+		$post_content = str_replace( '<br>', '<br />', $post_content );
+		$post_content = str_replace( '<hr>', '<hr />', $post_content );
+
+		$postdata = compact( 'post_id', 'post_author', 'post_date', 'post_date_gmt', 'post_content', 'post_excerpt',
+			'post_title', 'status', 'post_name', 'comment_status', 'ping_status', 'guid', 'post_parent',
+			'menu_order', 'post_type', 'post_password', 'is_sticky'
+		);
+
+		preg_match_all( '|<category domain="([^"]+?)" nicename="([^"]+?)">(.+?)</category>|is', $post, $terms, PREG_SET_ORDER );
+		foreach ( $terms as $t ) {
+			$post_terms[] = array(
+				'slug' => $t[2],
+				'domain' => $t[1],
+				'name' => str_replace( array( '<![CDATA[', ']]>' ), '', $t[3] ),
+			);
+		}
+		if ( ! empty( $post_terms ) ) $postdata['terms'] = $post_terms;
+
+		preg_match_all( '|<wp:comment>(.+?)</wp:comment>|is', $post, $comments );
+		$comments = $comments[1];
+		if ( $comments ) {
+			foreach ( $comments as $comment ) {
+				$post_comments[] = array(
+					'comment_id' => $this->get_tag( $comment, 'wp:comment_id' ),
+					'comment_author' => $this->get_tag( $comment, 'wp:comment_author' ),
+					'comment_author_email' => $this->get_tag( $comment, 'wp:comment_author_email' ),
+					'comment_author_IP' => $this->get_tag( $comment, 'wp:comment_author_IP' ),
+					'comment_author_url' => $this->get_tag( $comment, 'wp:comment_author_url' ),
+					'comment_date' => $this->get_tag( $comment, 'wp:comment_date' ),
+					'comment_date_gmt' => $this->get_tag( $comment, 'wp:comment_date_gmt' ),
+					'comment_content' => $this->get_tag( $comment, 'wp:comment_content' ),
+					'comment_approved' => $this->get_tag( $comment, 'wp:comment_approved' ),
+					'comment_type' => $this->get_tag( $comment, 'wp:comment_type' ),
+					'comment_parent' => $this->get_tag( $comment, 'wp:comment_parent' ),
+				);
+			}
+		}
+		if ( ! empty( $post_comments ) ) $postdata['comments'] = $post_comments;
+
+		preg_match_all( '|<wp:postmeta>(.+?)</wp:postmeta>|is', $post, $postmeta );
+		$postmeta = $postmeta[1];
+		if ( $postmeta ) {
+			foreach ( $postmeta as $p ) {
+				$post_postmeta[] = array(
+					'key' => $this->get_tag( $p, 'wp:meta_key' ),
+					'value' => $this->get_tag( $p, 'wp:meta_value' ),
+				);
+			}
+		}
+		if ( ! empty( $post_postmeta ) ) $postdata['postmeta'] = $post_postmeta;
+
+		return $postdata;
+	}
+
+	function _normalize_tag( $matches ) {
+		return '<' . strtolower( $matches[1] );
+	}
+
+	function fopen( $filename, $mode = 'r' ) {
+		if ( $this->has_gzip )
+			return gzopen( $filename, $mode );
+		return fopen( $filename, $mode );
+	}
+
+	function feof( $fp ) {
+		if ( $this->has_gzip )
+			return gzeof( $fp );
+		return feof( $fp );
+	}
+
+	function fgets( $fp, $len = 8192 ) {
+		if ( $this->has_gzip )
+			return gzgets( $fp, $len );
+		return fgets( $fp, $len );
+	}
+
+	function fclose( $fp ) {
+		if ( $this->has_gzip )
+			return gzclose( $fp );
+		return fclose( $fp );
+	}
+}
Index: trunk/readme.txt
===================================================================
--- trunk/readme.txt	(revision 303281)
+++ trunk/readme.txt	(working copy)
@@ -3,26 +3,39 @@
 Donate link: 
 Tags: importer, wordpress
 Requires at least: 3.0
-Tested up to: 3.0
+Tested up to: 3.0.1
 Stable tag: 0.2
 
-Import posts, pages, comments, custom fields, categories, and tags from a WordPress export file.
+Import posts, pages, comments, custom fields, categories, tags and more from a WordPress export file.
 
 == Description ==
 
-Import posts, pages, comments, custom fields, categories, and tags from a WordPress export file.
+Import posts, pages, comments, custom fields, categories, tags and more from a WordPress export file.
 
 == Installation ==
 
 1. Upload the `wordpress-importer` folder to the `/wp-content/plugins/` directory
 1. Activate the plugin through the 'Plugins' menu in WordPress
-1. Go to the Tools -> Import screen, Click on WordPress
+1. Go to the Tools -> Import screen, click on WordPress
 
-== Frequently Asked Questions ==
+== Changelog ==
 
-== Screenshots ==
+= 0.3 =
+* Use an XML Parser if possible
+* Proper import support for nav menus
+* ... and more
 
-== Changelog ==
-
 = 0.1 =
 * Initial release
+
+== Upgrade Notice ==
+
+= 0.3 =
+Upgrade for a more robust and reliable experience when importing WordPress export file.
+
+== Filters ==
+
+The importer has a couple of filters to allow you to completely enable/block certain features:
+* `import_allow_create_users`: return false if you only want to allow mapping to existing users
+* `import_allow_fetch_attachments`: return false if you do not wish to allow importing and downloading of attachments
+* `import_attachment_size_limit`: return an integer value for the maximum file size in bytes to save (default is 0, which is unlimited)
Index: trunk/wordpress-importer.php
===================================================================
--- trunk/wordpress-importer.php	(revision 303281)
+++ trunk/wordpress-importer.php	(working copy)
@@ -2,733 +2,639 @@
 /*
 Plugin Name: WordPress Importer
 Plugin URI: http://wordpress.org/extend/plugins/wordpress-importer/
-Description: Import posts, pages, comments, custom fields, categories, and tags from a WordPress export file.
+Description: Import posts, pages, comments, custom fields, categories, tags and more from a WordPress export file.
 Author: wordpressdotorg
 Author URI: http://wordpress.org/
-Version: 0.2
-Stable tag: 0.2
+Version: 0.3
 License: GPL v2 - http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
 */
 
-if ( !defined('WP_LOAD_IMPORTERS') )
+if ( ! defined( 'WP_LOAD_IMPORTERS' ) )
 	return;
 
 // Load Importer API
 require_once ABSPATH . 'wp-admin/includes/import.php';
 
-if ( !class_exists( 'WP_Importer' ) ) {
+if ( ! class_exists( 'WP_Importer' ) ) {
 	$class_wp_importer = ABSPATH . 'wp-admin/includes/class-wp-importer.php';
 	if ( file_exists( $class_wp_importer ) )
-		require_once $class_wp_importer;
+		require $class_wp_importer;
 }
 
+// include WXR file parsers
+require dirname( __FILE__ ) . '/parsers.php';
+
 /**
- * WordPress Importer
+ * WordPress Importer class for managing the import process of a WXR file
  *
  * @package WordPress
  * @subpackage Importer
  */
 if ( class_exists( 'WP_Importer' ) ) {
 class WP_Import extends WP_Importer {
-
-	var $post_ids_processed = array ();
-	var $orphans = array ();
-	var $file;
 	var $id;
-	var $mtnames = array ();
-	var $newauthornames = array ();
-	var $allauthornames = array ();
 
-	var $author_ids = array ();
-	var $tags = array ();
-	var $categories = array ();
-	var $terms = array ();
-	var $authors = array ();
+	var $authors = array();
+	var $posts = array();
+	var $terms = array();
+	var $categories = array();
+	var $tags = array();
+	var $base_url = '';
 
-	var $j = -1;
+	var $processed_authors = array();
+	var $processed_terms = array();
+	var $processed_posts = array();
+	var $post_orphans = array();
+	var $processed_menu_items = array();
+	var $menu_item_orphans = array();
+	var $missing_menu_items = array();
+
+	var $authors_from_posts = false;
 	var $fetch_attachments = false;
-	var $url_remap = array ();
+	var $url_remap = array();
 
-	function header() {
-		echo '<div class="wrap">';
-		screen_icon();
-		echo '<h2>'.__('Import WordPress', 'wordpress-importer').'</h2>';
-	}
+	function WP_Import() { /* nothing */ }
 
-	function footer() {
-		echo '</div>';
-	}
+	function dispatch() {
+		$this->header();
 
-	function greet() {
-		echo '<div class="narrow">';
-		echo '<p>'.__('Howdy! Upload your WordPress eXtended RSS (WXR) file and we&#8217;ll import the posts, pages, comments, custom fields, categories, and tags into this site.', 'wordpress-importer').'</p>';
-		echo '<p>'.__('Choose a WordPress WXR file to upload, then click Upload file and import.', 'wordpress-importer').'</p>';
-		wp_import_upload_form("admin.php?import=wordpress&amp;step=1");
-		echo '</div>';
-	}
-
-	function get_tag( $string, $tag ) {
-		global $wpdb;
-		preg_match("|<$tag.*?>(.*?)</$tag>|is", $string, $return);
-		if ( isset($return[1]) ) {
-			$return = preg_replace('|^<!\[CDATA\[(.*)\]\]>$|s', '$1', $return[1]);
-			$return = $wpdb->escape( trim( $return ) );
-		} else {
-			$return = '';
+		$step = empty( $_GET['step'] ) ? 0 : (int) $_GET['step'];
+		switch ( $step ) {
+			case 0:
+				$this->greet();
+				break;
+			case 1:
+				check_admin_referer( 'import-upload' );
+				if ( $this->handle_upload() )
+					$this->import_options();
+				break;
+			case 2:
+				check_admin_referer( 'import-wordpress' );
+				$this->fetch_attachments = ( ! empty( $_POST['fetch_attachments'] ) && $this->allow_fetch_attachments() );
+				$this->id = (int) $_POST['import_id'];
+				$file = get_attached_file( $this->id );
+				$this->import( $file );
+				break;
 		}
-		return $return;
-	}
 
-	function has_gzip() {
-		return is_callable('gzopen');
+		$this->footer();
 	}
 
-	function fopen($filename, $mode='r') {
-		if ( $this->has_gzip() )
-			return gzopen($filename, $mode);
-		return fopen($filename, $mode);
-	}
+	function import( $file ) {
+		add_filter( 'import_post_meta_key', array( $this, 'is_valid_meta_key' ) );
 
-	function feof($fp) {
-		if ( $this->has_gzip() )
-			return gzeof($fp);
-		return feof($fp);
-	}
+		$this->import_start( $file );
 
-	function fgets($fp, $len=8192) {
-		if ( $this->has_gzip() )
-			return gzgets($fp, $len);
-		return fgets($fp, $len);
-	}
+		$this->get_author_mapping();
 
-	function fclose($fp) {
-		if ( $this->has_gzip() )
-			return gzclose($fp);
-		return fclose($fp);
-	}
+		wp_suspend_cache_invalidation( true );
+		$this->process_categories();
+		$this->process_tags();
+		$this->process_terms();
+		$this->process_posts();
+		wp_suspend_cache_invalidation( false );
 
-	function get_entries($process_post_func=NULL) {
-		set_magic_quotes_runtime(0);
+		// update items with missing/incorrect parent IDs
+		$this->backfill_parents();
+		// update attachment references within posts and postmeta
+		$this->backfill_attachment_urls();
 
-		$doing_entry = false;
-		$is_wxr_file = false;
+		$this->import_end();
+	}
 
-		$fp = $this->fopen($this->file, 'r');
-		if ($fp) {
-			while ( !$this->feof($fp) ) {
-				$importline = rtrim($this->fgets($fp));
+	function import_start( $file ) {
+		$import_arr = $this->parse( $file );
 
-				// this doesn't check that the file is perfectly valid but will at least confirm that it's not the wrong format altogether
-				if ( !$is_wxr_file && preg_match('|xmlns:wp="http://wordpress[.]org/export/\d+[.]\d+/"|', $importline) )
-					$is_wxr_file = true;
-
-				if ( false !== strpos($importline, '<wp:base_site_url>') ) {
-					preg_match('|<wp:base_site_url>(.*?)</wp:base_site_url>|is', $importline, $url);
-					$this->base_url = $url[1];
-					continue;
-				}
-				if ( false !== strpos($importline, '<wp:category>') ) {
-					preg_match('|<wp:category>(.*?)</wp:category>|is', $importline, $category);
-					$this->categories[] = $category[1];
-					continue;
-				}
-				if ( false !== strpos($importline, '<wp:tag>') ) {
-					preg_match('|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag);
-					$this->tags[] = $tag[1];
-					continue;
-				}
-				if ( false !== strpos($importline, '<wp:term>') ) {
-					preg_match('|<wp:term>(.*?)</wp:term>|is', $importline, $term);
-					$this->terms[] = $term[1];
-					continue;
-				}
-				if ( false !== strpos($importline, '<wp:author>') ) {
-					preg_match('|<wp:author>(.*?)</wp:author>|is', $importline, $author);
-					$this->authors[] = $author[1];
-					continue;
-				}
-				if ( false !== strpos($importline, '<item>') ) {
-					$this->post = '';
-					$doing_entry = true;
-					continue;
-				}
-				if ( false !== strpos($importline, '</item>') ) {
-					$doing_entry = false;
-					if ($process_post_func)
-						call_user_func($process_post_func, $this->post);
-					continue;
-				}
-				if ( $doing_entry ) {
-					$this->post .= $importline . "\n";
-				}
-			}
-
-			$this->fclose($fp);
+		if ( is_wp_error( $import_arr ) ) {
+			echo '<p><strong>' . __( 'Sorry, there has been an error.', 'wordpress-importer' ) . '</strong></p>';
+			echo '<p>' . esc_html( $import_arr->get_error_message() ) . '</p>';
+			$this->footer();
+			die();
 		}
 
-		return $is_wxr_file;
+		$this->get_authors_from_import( $import_arr );
+		$this->posts = $import_arr['posts'];
+		$this->terms = $import_arr['terms'];
+		$this->categories = $import_arr['categories'];
+		$this->tags = $import_arr['tags'];
+		$this->base_url = esc_url( $import_arr['base_url'] );
 
+		wp_defer_term_counting( true );
+		wp_defer_comment_counting( true );
+
+		do_action( 'import_start' );
 	}
 
-	function get_wp_authors() {
-		// We need to find unique values of author names, while preserving the order, so this function emulates the unique_value(); php function, without the sorting.
-		$temp = $this->allauthornames;
-		$authors[0] = array_shift($temp);
-		$y = count($temp) + 1;
-		for ($x = 1; $x < $y; $x ++) {
-			$next = array_shift($temp);
-			if (!(in_array($next, $authors)))
-				array_push($authors, $next);
+	function import_end() {
+		wp_import_cleanup( $this->id );
+
+		wp_cache_flush();
+		foreach ( get_taxonomies() as $tax ) {
+			delete_option( "{$tax}_children" );
+			_get_term_hierarchy( $tax );
 		}
 
-		return $authors;
+		wp_defer_term_counting( false );
+		wp_defer_comment_counting( false );
+
+		echo '<p>' . __( 'All done.' ) . ' <a href="' . admin_url() . '">' . __( 'Have fun!' ) . '</a>' . '</p>';
+
+		do_action( 'import_end' );
 	}
 
-	function get_authors_from_post() {
-		global $current_user;
+	function handle_upload() {
+		$file = wp_import_handle_upload();
 
-		// this will populate $this->author_ids with a list of author_names => user_ids
+		if ( isset( $file['error'] ) ) {
+			echo '<p><strong>' . __( 'Sorry, there has been an error.', 'wordpress-importer' ) . '</strong></p>';
+			echo '<p>' . esc_html( $file['error'] ) . '</p>';
+			return false;
+		}
 
-		foreach ( (array) $_POST['author_in'] as $i => $in_author_name ) {
+		$this->id = (int) $file['id'];
+		$import_data = $this->parse( $file['file'] );
+		if ( is_wp_error( $import_data ) ) {
+			echo '<p><strong>' . __( 'Sorry, there has been an error.', 'wordpress-importer' ) . '</strong></p>';
+			echo '<p>' . esc_html( $import_data->get_error_message() ) . '</p>';
+			return false;
+		}
 
-			if ( !empty($_POST['user_select'][$i]) ) {
-				// an existing user was selected in the dropdown list
-				$user = get_userdata( intval($_POST['user_select'][$i]) );
-				if ( isset($user->ID) )
-					$this->author_ids[$in_author_name] = $user->ID;
-			}
-			elseif ( $this->allow_create_users() ) {
-				// nothing was selected in the dropdown list, so we'll use the name in the text field
+		$this->get_authors_from_import( $import_data );
 
-				$new_author_name = trim($_POST['user_create'][$i]);
-				// if the user didn't enter a name, assume they want to use the same name as in the import file
-				if ( empty($new_author_name) )
-					$new_author_name = $in_author_name;
+		return true;
+	}
 
-				$user_id = username_exists($new_author_name);
-				if ( !$user_id ) {
-					$user_id = wp_create_user($new_author_name, wp_generate_password());
+	function get_authors_from_import( $import_data ) {
+		if ( ! empty( $import_data['authors'] ) ) {
+			$this->authors = $import_data['authors'];
+		// no author information, grab it from the posts
+		} else {
+			foreach ( $import_data['posts'] as $post ) {
+				$login = sanitize_user( $post['post_author'], true );
+				if ( empty( $login ) ) {
+					_e( sprintf( 'Error importing author %s their posts will be attributed to the current user', esc_html( $post['post_author'] ) ) );
+					echo '<br />';
+					continue;
 				}
 
-				if ( !is_wp_error( $user_id ) ) {
-					$this->author_ids[$in_author_name] = $user_id;
-				}
+				if ( ! isset($this->authors[$login]) )
+					$this->authors[$login] = array(
+						'author_login' => $login,
+						'author_display_name' => $post['post_author']
+					);
 			}
-
-			// failsafe: if the user_id was invalid, default to the current user
-			if ( empty($this->author_ids[$in_author_name]) ) {
-				$this->author_ids[$in_author_name] = intval($current_user->ID);
-			}
+			$this->authors_from_posts = true;
 		}
-
 	}
 
-	function wp_authors_form() {
+	function import_options() {
+		$j = 0;
 ?>
-<h2><?php _e('Assign Authors', 'wordpress-importer'); ?></h2>
-<p><?php _e('To make it easier for you to edit and save the imported posts and drafts, you may want to change the name of the author of the posts. For example, you may want to import all the entries as <code>admin</code>s entries.', 'wordpress-importer'); ?></p>
-<?php
-	if ( $this->allow_create_users() ) {
-		echo '<p>'.__('If a new user is created by WordPress, a password will be randomly generated. Manually change the user&#8217;s details if necessary.', 'wordpress-importer')."</p>\n";
-	}
+<form action="<?php echo admin_url( 'admin.php?import=wordpress&amp;step=2' ); ?>" method="post">
+	<?php wp_nonce_field( 'import-wordpress' ); ?>
+	<input type="hidden" name="import_id" value="<?php echo $this->id; ?>" />
 
+<?php if ( ! empty( $this->authors ) ) : ?>
+	<h3><?php _e('Assign Authors', 'wordpress-importer'); ?></h3>
+	<p><?php _e( 'To make it easier for you to edit and save the imported content, you may want to reassign the author of the imported item to an existing user of this site. For example, you may want to import all the entries as <code>admin</code>s entries.', 'wordpress-importer' ); ?></p>
+<?php if ( ! $this->authors_from_posts && $this->allow_create_users() ) : ?>
+	<p><?php printf( __( 'If a new user is created by WordPress, a new password will be randomly generated and the new user&#8217;s role will be set as %s. Manually changing the new user&#8217;s details will be necessary.', 'wordpress-importer' ), esc_html( get_option('default_role') ) ); ?></p>
+<?php endif; ?>
+	<ol id="authors">
+<?php foreach ( $this->authors as $author ) : ?>
+		<li><?php $this->author_select( $j++, $author ); ?></li>
+<?php endforeach; ?>
+	</ol>
+<?php endif; ?>
 
-		$authors = $this->get_wp_authors();
-		echo '<form action="?import=wordpress&amp;step=2&amp;id=' . $this->id . '" method="post">';
-		wp_nonce_field('import-wordpress');
-?>
-<ol id="authors">
+<?php if ( $this->allow_fetch_attachments() ) : ?>
+	<h3><?php _e('Import Attachments', 'wordpress-importer'); ?></h3>
+	<p>
+		<input type="checkbox" value="1" name="fetch_attachments" id="import-attachments" />
+		<label for="import-attachments"><?php _e( 'Download and import file attachments', 'wordpress-importer' ); ?></label>
+	</p>
+<?php endif; ?>
+
+	<p class="submit"><input type="submit" class="button" value="<?php esc_attr_e( 'Submit', 'wordpress-importer' ); ?>" /></p>
+</form>
 <?php
-		$j = -1;
-		foreach ($authors as $author) {
-			++ $j;
-			echo '<li>'.__('Import author:', 'wordpress-importer').' <strong>'.$author.'</strong><br />';
-			$this->users_form($j, $author);
-			echo '</li>';
-		}
+	}
 
-		if ( $this->allow_fetch_attachments() ) {
+	function author_select( $n, $author ) {
+		if ( ! $this->authors_from_posts && $this->allow_create_users() )
+			printf( __( 'Import author %1$s or map to existing user', 'wordpress-importer' ), '<strong>' . esc_html( $author['author_display_name'] ) . '</strong>' );
+		else
+			printf( __( 'Map author %1$s to existing user', 'wordpress-importer' ), '<strong>' . esc_html( $author['author_display_name'] ) . '</strong>' );
 ?>
-</ol>
-<h2><?php _e('Import Attachments', 'wordpress-importer'); ?></h2>
-<p>
-	<input type="checkbox" value="1" name="attachments" id="import-attachments" />
-	<label for="import-attachments"><?php _e('Download and import file attachments', 'wordpress-importer') ?></label>
-</p>
-
+		<input type="hidden" name="imported_authors[<?php echo $n; ?>]" value="<?php esc_attr_e( $author['author_login'] ); ?>" />
+		<?php wp_dropdown_users( array( 'name' => "user_map[$n]", 'multi' => true, 'show_option_all' => __( '- Select -', 'wordpress-importer' ) ) ); ?>
 <?php
-		}
-
-		echo '<p class="submit">';
-		echo '<input type="submit" class="button" value="'. esc_attr__('Submit', 'wordpress-importer') .'" />'.'<br />';
-		echo '</p>';
-		echo '</form>';
-
 	}
 
-	function users_form($n, $author) {
+	function get_author_mapping() {
+		if ( ! isset( $_POST['imported_authors'] ) )
+			return;
 
-		if ( $this->allow_create_users() ) {
-			printf('<label>'.__('Create user %1$s or map to existing', 'wordpress-importer'), ' <input type="text" value="'. esc_attr($author) .'" name="'.'user_create['.intval($n).']'.'" maxlength="30" /></label> <br />');
-		}
-		else {
-			echo __('Map to existing', 'wordpress-importer').'<br />';
-		}
+		foreach ( (array) $_POST['imported_authors'] as $i => $login ) {
+			$bad_login = $login;
+			$login = sanitize_user( $login, true );
 
-		// keep track of $n => $author name
-		echo '<input type="hidden" name="author_in['.intval($n).']" value="' . esc_attr($author).'" />';
+			if ( ! empty( $_POST['user_map'][$i] ) ) {
+				$user = get_userdata( intval($_POST['user_map'][$i]) );
+				if ( isset( $user->ID ) )
+					$this->processed_authors[$login] = $user->ID;
+			} else if ( ! $this->authors_from_posts && $this->allow_create_users() ) {
+				$user_id = username_exists( $login );
+				if ( ! $user_id ) {
+					$user_data = array(
+						'user_login' => $login,
+						'user_pass' => wp_generate_password(),
+						'user_email' => $this->authors[$login]['author_email'],
+						'display_name' => $this->authors[$login]['author_display_name'],
+						'first_name' => $this->authors[$login]['author_first_name'],
+						'last_name' => $this->authors[$login]['author_last_name'],
+					);
+					$user_id = wp_insert_user( $user_data );
+				}
 
-		$users = get_users_of_blog();
-?><select name="user_select[<?php echo $n; ?>]">
-	<option value="0"><?php _e('- Select -', 'wordpress-importer'); ?></option>
-	<?php
-		foreach ($users as $user) {
-			echo '<option value="'.$user->user_id.'">'.$user->user_login.'</option>';
-		}
-?>
-	</select>
-	<?php
-	}
+				if ( ! is_wp_error( $user_id ) )
+					$this->processed_authors[$login] = $user_id;
+				else
+					_e( sprintf( 'Error importing author %s their posts will be attributed to the current user', esc_html( $post['post_author'] ) ) );
+			}
 
-	function select_authors() {
-		$is_wxr_file = $this->get_entries(array(&$this, 'process_author'));
-		if ( $is_wxr_file ) {
-			$this->wp_authors_form();
+			// failsafe: if the user_id was invalid, default to the current user
+			if ( empty( $this->processed_authors[$login] ) )
+				$this->processed_authors[$login] = (int) get_current_user_id();
 		}
-		else {
-			echo '<h2>'.__('Invalid file', 'wordpress-importer').'</h2>';
-			echo '<p>'.__('Please upload a valid WXR (WordPress eXtended RSS) export file.', 'wordpress-importer').'</p>';
-		}
 	}
 
-	// fetch the user ID for a given author name, respecting the mapping preferences
-	function checkauthor($author) {
-		global $current_user;
-
-		if ( !empty($this->author_ids[$author]) )
-			return $this->author_ids[$author];
-
-		// failsafe: map to the current user
-		return $current_user->ID;
-	}
-
-
-
 	function process_categories() {
-		global $wpdb;
+		if ( empty( $this->categories ) )
+			return;
 
-		$cat_names = (array) get_terms('category', array('fields' => 'names'));
-
-		while ( $c = array_shift($this->categories) ) {
-			$cat_name = trim($this->get_tag( $c, 'wp:cat_name' ));
-
-			// If the category exists we leave it alone
-			if ( in_array($cat_name, $cat_names) )
+		foreach ( $this->categories as $cat ) {
+			// if the category already exists leave it alone
+			$term_id = term_exists( $cat['category_nicename'], 'category' );
+			if ( $term_id ) {
+				if ( is_array($term_id) ) $term_id = $term_id['term_id'];
+				$this->processed_terms[intval($cat['term_id'])] = (int) $term_id;
 				continue;
+			}
 
-			$category_nicename	= $this->get_tag( $c, 'wp:category_nicename' );
-			$category_description = $this->get_tag( $c, 'wp:category_description' );
-			$posts_private		= (int) $this->get_tag( $c, 'wp:posts_private' );
-			$links_private		= (int) $this->get_tag( $c, 'wp:links_private' );
+			$category_parent = empty( $cat['category_parent'] ) ? 0 : category_exists( $cat['category_parent'] );
+			$category_description = isset( $cat['category_description'] ) ? $cat['category_description'] : '';
+			$catarr = array(
+				'category_nicename' => $cat['category_nicename'],
+				'category_parent' => $category_parent,
+				'cat_name' => $cat['cat_name'],
+				'category_description' => $category_description
+			);
 
-			$parent = $this->get_tag( $c, 'wp:category_parent' );
-
-			if ( empty($parent) )
-				$category_parent = '0';
-			else
-				$category_parent = category_exists($parent);
-
-			$catarr = compact('category_nicename', 'category_parent', 'posts_private', 'links_private', 'posts_private', 'cat_name', 'category_description');
-
-			print '<em>' . sprintf( __( 'Importing category <em>%s</em>&#8230;' , 'wordpress-importer'), esc_html($cat_name) ) . '</em><br />' . "\n";
-			$cat_ID = wp_insert_category($catarr);
+			$id = wp_insert_category( $catarr );
+			if ( ! is_wp_error( $id ) ) {
+				$this->processed_terms[intval($cat['term_id'])] = $id;
+			} else {
+				echo __( 'Error importing category:', 'wordpress-importer' ) . ' ' . esc_html( $id->get_error_message() ) . '<br />';
+				continue;
+			}
 		}
 	}
 
 	function process_tags() {
-		global $wpdb;
+		if ( empty( $this->tags ) )
+			return;
 
-		$tag_names = (array) get_terms('post_tag', array('fields' => 'names'));
-
-		while ( $c = array_shift($this->tags) ) {
-			$tag_name = trim($this->get_tag( $c, 'wp:tag_name' ));
-
-			// If the category exists we leave it alone
-			if ( in_array($tag_name, $tag_names) )
+		foreach ( $this->tags as $tag ) {
+			// if the tag already exists leave it alone
+			$term_id = term_exists( $tag['tag_slug'], 'post_tag' );
+			if ( $term_id ) {
+				if ( is_array($term_id) ) $term_id = $term_id['term_id'];
+				$this->processed_terms[intval($tag['term_id'])] = (int) $term_id;
 				continue;
+			}
 
-			$slug = $this->get_tag( $c, 'wp:tag_slug' );
-			$description = $this->get_tag( $c, 'wp:tag_description' );
+			$tag_desc = isset( $tag['tag_description'] ) ? $tag['tag_description'] : '';
+			$tagarr = array( 'slug' => $tag['tag_slug'], 'description' => $tag_desc );
 
-			$tagarr = compact('slug', 'description');
-
-			print '<em>' . sprintf( __( 'Importing tag <em>%s</em>&#8230;' , 'wordpress-importer'), esc_html($tag_name) ) . '</em><br />' . "\n";
-			$tag_ID = wp_insert_term($tag_name, 'post_tag', $tagarr);
+			$id = wp_insert_term( $tag['tag_name'], 'post_tag', $tagarr );
+			if ( ! is_wp_error( $id ) ) {
+				$this->processed_terms[intval($tag['term_id'])] = $id['term_id'];
+			} else {
+				echo __( 'Error importing post tag:', 'wordpress-importer' ) . ' ' . esc_html( $id->get_error_message() ) . '<br />';
+				continue;
+			}
 		}
 	}
 
 	function process_terms() {
-		global $wpdb, $wp_taxonomies;
+		if ( empty( $this->terms ) )
+			return;
 
-		$custom_taxonomies = $wp_taxonomies;
-		// get rid of the standard taxonomies
-		unset( $custom_taxonomies['category'] );
-		unset( $custom_taxonomies['post_tag'] );
-		unset( $custom_taxonomies['link_category'] );
+		foreach ( $this->terms as $term ) {
+			// if the term already exists in the correct taxonomy leave it alone
+			$term_id = term_exists( $term['slug'], $term['term_taxonomy'] );
+			if ( $term_id ) {
+				if ( is_array($term_id) ) $term_id = $term_id['term_id'];
+				$this->processed_terms[intval($term['term_id'])] = (int) $term_id;
+				continue;
+			}
 
-		$custom_taxonomies = array_keys( $custom_taxonomies );
-		$current_terms = (array) get_terms( $custom_taxonomies, array('get' => 'all') );
-		$taxonomies = array();
-		foreach ( $current_terms as $term ) {
-			if ( isset( $_terms[$term->taxonomy] ) ) {
-				$taxonomies[$term->taxonomy] = array_merge( $taxonomies[$term->taxonomy], array($term->name) );
+			if ( empty( $term['term_parent'] ) ) {
+				$parent = 0;
 			} else {
-				$taxonomies[$term->taxonomy] = array($term->name);
+				$parent = term_exists( $term['term_parent'], $term['term_taxonomy'] );
+				if ( is_array( $parent ) ) $parent = $parent['term_id'];
 			}
-		}
+			$description = isset( $term['term_description'] ) ? $term['term_description'] : '';
+			$termarr = array( 'slug' => $term['slug'], 'description' => $description, 'parent' => intval($parent) );
 
-		while ( $c = array_shift($this->terms) ) {
-			$term_name = trim($this->get_tag( $c, 'wp:term_name' ));
-			$term_taxonomy = trim($this->get_tag( $c, 'wp:term_taxonomy' ));
-
-			// If the term exists in the taxonomy we leave it alone
-			if ( isset($taxonomies[$term_taxonomy] ) && in_array( $term_name, $taxonomies[$term_taxonomy] ) )
+			$id = wp_insert_term( $term['term_name'], $term['term_taxonomy'], $termarr );
+			if ( ! is_wp_error( $id ) ) {
+				$this->processed_terms[intval($term['term_id'])] = $id['term_id'];
+			} else {
+				echo __( 'Error importing term:', 'wordpress-importer' ) . ' ' . esc_html( $id->get_error_message() ) . '<br />';
 				continue;
-
-			$slug = $this->get_tag( $c, 'wp:term_slug' );
-			$description = $this->get_tag( $c, 'wp:term_description' );
-
-			$termarr = compact('slug', 'description');
-
-			print '<em>' . sprintf( __( 'Importing <em>%s</em>&#8230;' , 'wordpress-importer'), esc_html($term_name) ) . '</em><br />' . "\n";
-			$term_ID = wp_insert_term($term_name, $this->get_tag( $c, 'wp:term_taxonomy' ), $termarr);
+			}
 		}
 	}
 
-	function process_author($post) {
-		$author = $this->get_tag( $post, 'dc:creator' );
-		if ($author)
-			$this->allauthornames[] = $author;
-	}
-
 	function process_posts() {
-		echo '<ol>';
+		foreach ( $this->posts as $post ) {
+			if ( isset( $this->processed_posts[$post['post_id']] ) )
+				continue;
 
-		$this->get_entries(array(&$this, 'process_post'));
+			if ( 'nav_menu_item' == $post['post_type'] ) {
+				$this->process_menu_item( $post );
+				continue;
+			}
 
-		echo '</ol>';
+			$post_exists = post_exists( $post['post_title'], '', $post['post_date'] );
+			if ( $post_exists ) {
+				$comment_post_ID = $post_id = $post_exists;
+			} else {
+				$post_parent = (int) $post['post_parent'];
+				if ( $post_parent ) {
+					// if we already know the parent, map it to the new local ID
+					if ( isset( $this->processed_posts[$post_parent] ) ) {
+						$post_parent = $this->processed_posts[$post_parent];
+					// otherwise record the parent for later
+					} else {
+						$this->post_orphans[intval($post['post_id'])] = $post_parent;
+						$post_parent = 0;
+					}
+				}
 
-		wp_import_cleanup($this->id);
-		do_action('import_done', 'wordpress');
+				// map the post author
+				$author = sanitize_user( $post['post_author'], true );
+				if ( isset( $this->processed_authors[$author] ) )
+					$author = $this->processed_authors[$author];
+				else
+					$author = (int) get_current_user_id();
 
-		echo '<h3>'.sprintf(__('All done.', 'wordpress-importer').' <a href="%s">'.__('Have fun!', 'wordpress-importer').'</a>', get_option('home')).'</h3>';
-	}
+				$postdata = array(
+					'import_id' => $post['post_id'], 'post_author' => $author, 'post_date' => $post['post_date'],
+					'post_date_gmt' => $post['post_date_gmt'], 'post_content' => $post['post_content'],
+					'post_excerpt' => $post['post_excerpt'], 'post_title' => $post['post_title'],
+					'post_status' => $post['status'], 'post_name' => $post['post_name'],
+					'comment_status' => $post['comment_status'], 'ping_status' => $post['ping_status'],
+					'guid' => $post['guid'], 'post_parent' => $post_parent, 'menu_order' => $post['menu_order'],
+					'post_type' => $post['post_type'], 'post_password' => $post['post_password']
+				);
 
-	function _normalize_tag( $matches ) {
-		return '<' . strtolower( $matches[1] );
-	}
+				if ( 'attachment' == $postdata['post_type'] ) {
+					$remote_url = ! empty($post['attachment_url']) ? $post['attachment_url'] : $post['guid'];
+					$comment_post_ID = $post_id = $this->process_attachment( $postdata, $remote_url );
+				} else {
+					$comment_post_ID = $post_id = wp_insert_post( $postdata, true );
+				}
 
-	function process_post($post) {
-		global $wpdb;
-
-		$post_ID = (int) $this->get_tag( $post, 'wp:post_id' );
-  		if ( $post_ID && !empty($this->post_ids_processed[$post_ID]) ) // Processed already
-			return 0;
-
-		set_time_limit( 60 );
-
-		// There are only ever one of these
-		$post_title     = $this->get_tag( $post, 'title' );
-		$post_date      = $this->get_tag( $post, 'wp:post_date' );
-		$post_date_gmt  = $this->get_tag( $post, 'wp:post_date_gmt' );
-		$comment_status = $this->get_tag( $post, 'wp:comment_status' );
-		$ping_status    = $this->get_tag( $post, 'wp:ping_status' );
-		$post_status    = $this->get_tag( $post, 'wp:status' );
-		$post_name      = $this->get_tag( $post, 'wp:post_name' );
-		$post_parent    = $this->get_tag( $post, 'wp:post_parent' );
-		$menu_order     = $this->get_tag( $post, 'wp:menu_order' );
-		$post_type      = $this->get_tag( $post, 'wp:post_type' );
-		$post_password  = $this->get_tag( $post, 'wp:post_password' );
-		$is_sticky		= $this->get_tag( $post, 'wp:is_sticky' );
-		$guid           = $this->get_tag( $post, 'guid' );
-		$post_author    = $this->get_tag( $post, 'dc:creator' );
-
-		$post_excerpt = $this->get_tag( $post, 'excerpt:encoded' );
-		$post_excerpt = preg_replace_callback('|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_excerpt);
-		$post_excerpt = str_replace('<br>', '<br />', $post_excerpt);
-		$post_excerpt = str_replace('<hr>', '<hr />', $post_excerpt);
-
-		$post_content = $this->get_tag( $post, 'content:encoded' );
-		$post_content = preg_replace_callback('|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_content);
-		$post_content = str_replace('<br>', '<br />', $post_content);
-		$post_content = str_replace('<hr>', '<hr />', $post_content);
-
-		preg_match_all('|<category domain="tag">(.*?)</category>|is', $post, $tags);
-		$tags = $tags[1];
-
-		$tag_index = 0;
-		foreach ($tags as $tag) {
-			$tags[$tag_index] = $wpdb->escape( html_entity_decode( str_replace(array( '<![CDATA[', ']]>' ), '', $tag ) ) );
-			$tag_index++;
-		}
-
-		preg_match_all('|<category>(.*?)</category>|is', $post, $categories);
-		$categories = $categories[1];
-
-		$cat_index = 0;
-		foreach ($categories as $category) {
-			$categories[$cat_index] = $wpdb->escape( html_entity_decode( str_replace( array( '<![CDATA[', ']]>' ), '', $category ) ) );
-			$cat_index++;
-		}
-
-		$post_exists = post_exists($post_title, '', $post_date);
-
-		if ( $post_exists ) {
-			echo '<li>';
-			printf(__('Post <em>%s</em> already exists.', 'wordpress-importer'), stripslashes($post_title));
-			$comment_post_ID = $post_id = $post_exists;
-		} else {
-
-			// If it has parent, process parent first.
-			$post_parent = (int) $post_parent;
-			if ($post_parent) {
-				// if we already know the parent, map it to the local ID
-				if ( isset( $this->post_ids_processed[$post_parent] ) ) {
-					$post_parent = $this->post_ids_processed[$post_parent];  // new ID of the parent
+				if ( is_wp_error( $post_id ) ) {
+					echo __( 'Error importing post object:', 'wordpress-importer' ) . ' ' . esc_html( $post_id->get_error_message() ) . '<br />';
+					continue;
 				}
-				else {
-					// record the parent for later
-					$this->orphans[intval($post_ID)] = $post_parent;
-				}
-			}
 
-			echo '<li>';
-
-			$post_author = $this->checkauthor($post_author); //just so that if a post already exists, new users are not created by checkauthor
-
-			$postdata = compact('post_author', 'post_date', 'post_date_gmt', 'post_content', 'post_excerpt', 'post_title', 'post_status', 'post_name', 'comment_status', 'ping_status', 'guid', 'post_parent', 'menu_order', 'post_type', 'post_password');
-			$postdata['import_id'] = $post_ID;
-			if ($post_type == 'attachment') {
-				$remote_url = $this->get_tag( $post, 'wp:attachment_url' );
-				if ( !$remote_url )
-					$remote_url = $guid;
-
-				$comment_post_ID = $post_id = $this->process_attachment($postdata, $remote_url);
-				if ( !$post_id or is_wp_error($post_id) )
-					return $post_id;
-			}
-			else {
-				printf(__('Importing post <em>%s</em>...', 'wordpress-importer') . "\n", stripslashes($post_title));
-				$comment_post_ID = $post_id = wp_insert_post($postdata);
-				if ( $post_id && $is_sticky == 1 )
+				if ( $post['is_sticky'] == 1 )
 					stick_post( $post_id );
-
 			}
 
-			if ( is_wp_error( $post_id ) )
-				return $post_id;
+			// map pre-import ID to local ID
+			$this->processed_posts[intval($post['post_id'])] = (int) $post_id;
 
-			// Memorize old and new ID.
-			if ( $post_id && $post_ID ) {
-				$this->post_ids_processed[intval($post_ID)] = intval($post_id);
+			// add categories, tags and other terms
+			if ( ! empty( $post['terms'] ) ) {
+				foreach ( $post['terms'] as $term ) {
+					// back compat with WXR 1.0 map 'tag' to 'post_tag'
+					$taxonomy = ( 'tag' == $term['domain'] ) ? 'post_tag' : $term['domain'];
+					$term_exists = term_exists( $term['slug'], $taxonomy );
+					$term_id = is_array( $term_exists ) ? $term_exists['term_id'] : $term_exists;
+					if ( ! $term_id ) {
+						$t = wp_insert_term( $term['name'], $taxonomy, array( 'slug' => $term['slug'] ) );
+						if ( ! is_wp_error( $t ) ) {
+							$term_id = $t['term_id'];
+						} else {
+							echo __( 'Error importing term:', 'wordpress-importer' ) . ' ' . esc_html( $id->get_error_message() ) . '<br />';
+							continue;
+						}
+					}
+					$terms_to_set[$taxonomy][] = intval( $term_id );
+				}
+
+				foreach ( $terms_to_set as $tax => $ids ) {
+					$tt_ids = wp_set_post_terms( $post_id, $ids, $tax );
+				}
+				unset( $post['terms'], $terms_to_set );
 			}
 
-			// Add categories.
-			if (count($categories) > 0) {
-				$post_cats = array();
-				foreach ($categories as $category) {
-					if ( '' == $category )
-						continue;
-					$slug = sanitize_term_field('slug', $category, 0, 'category', 'db');
-					$cat = get_term_by('slug', $slug, 'category');
-					$cat_ID = 0;
-					if ( ! empty($cat) )
-						$cat_ID = $cat->term_id;
-					if ($cat_ID == 0) {
-						$category = $wpdb->escape($category);
-						$cat_ID = wp_insert_category(array('cat_name' => $category));
-						if ( is_wp_error($cat_ID) )
-							continue;
+			// add/update comments
+			if ( ! empty( $post['comments'] ) ) {
+				$num_comments = 0;
+				$inserted_comments = array();
+				foreach ( $post['comments'] as $comment ) {
+					$comment_id	= $comment['comment_id'];
+					$newcomments[$comment_id]['comment_post_ID']      = $comment_post_ID;
+					$newcomments[$comment_id]['comment_author']       = $comment['comment_author'];
+					$newcomments[$comment_id]['comment_author_email'] = $comment['comment_author_email'];
+					$newcomments[$comment_id]['comment_author_IP']    = $comment['comment_author_IP'];
+					$newcomments[$comment_id]['comment_author_url']   = $comment['comment_author_url'];
+					$newcomments[$comment_id]['comment_date']         = $comment['comment_date'];
+					$newcomments[$comment_id]['comment_date_gmt']     = $comment['comment_date_gmt'];
+					$newcomments[$comment_id]['comment_content']      = $comment['comment_content'];
+					$newcomments[$comment_id]['comment_approved']     = $comment['comment_approved'];
+					$newcomments[$comment_id]['comment_type']         = ! empty( $comment['comment_type'] ) ? $comment['comment_type'] : 'comment';
+					$newcomments[$comment_id]['comment_parent'] 	  = $comment['comment_parent'];
+				}
+				ksort( $newcomments );
+
+				foreach ( $newcomments as $key => $comment ) {
+					// if this is a new post we can skip the comment_exists() check
+					if ( ! $post_exists || ! comment_exists( $comment['comment_author'], $comment['comment_date'] ) ) {
+						if ( isset( $inserted_comments[$comment['comment_parent']] ) )
+							$comment['comment_parent'] = $inserted_comments[$comment['comment_parent']];
+						$comment = wp_filter_comment( $comment );
+						$inserted_comments[$key] = wp_insert_comment( $comment );
+						$num_comments++;
 					}
-					$post_cats[] = $cat_ID;
 				}
-				wp_set_post_categories($post_id, $post_cats);
+				unset( $newcomments, $inserted_comments, $post['comments'] );
 			}
 
-			// Add tags.
-			if (count($tags) > 0) {
-				$post_tags = array();
-				foreach ($tags as $tag) {
-					if ( '' == $tag )
-						continue;
-					$slug = sanitize_term_field('slug', $tag, 0, 'post_tag', 'db');
-					$tag_obj = get_term_by('slug', $slug, 'post_tag');
-					$tag_id = 0;
-					if ( ! empty($tag_obj) )
-						$tag_id = $tag_obj->term_id;
-					if ( $tag_id == 0 ) {
-						$tag = $wpdb->escape($tag);
-						$tag_id = wp_insert_term($tag, 'post_tag');
-						if ( is_wp_error($tag_id) )
-							continue;
-						$tag_id = $tag_id['term_id'];
+			// add/update post meta
+			if ( isset( $post['postmeta'] ) ) {
+				foreach ( $post['postmeta'] as $meta ) {
+					$key = apply_filters( 'import_post_meta_key', $meta['key'] );
+					if ( $key ) {
+						update_post_meta( $post_id, $key, $meta['value'] );
+						do_action( 'import_post_meta', $post_id, $key, $meta['value'] );
 					}
-					$post_tags[] = intval($tag_id);
 				}
-				wp_set_post_tags($post_id, $post_tags);
 			}
 		}
+	}
 
-		// Now for comments
-		preg_match_all('|<wp:comment>(.*?)</wp:comment>|is', $post, $comments);
-		$comments = $comments[1];
-		$num_comments = 0;
-		$inserted_comments = array();
-		if ( $comments) {
-			foreach ($comments as $comment) {
-				$comment_id	= $this->get_tag( $comment, 'wp:comment_id');
-				$newcomments[$comment_id]['comment_post_ID']      = $comment_post_ID;
-				$newcomments[$comment_id]['comment_author']       = $this->get_tag( $comment, 'wp:comment_author');
-				$newcomments[$comment_id]['comment_author_email'] = $this->get_tag( $comment, 'wp:comment_author_email');
-				$newcomments[$comment_id]['comment_author_IP']    = $this->get_tag( $comment, 'wp:comment_author_IP');
-				$newcomments[$comment_id]['comment_author_url']   = $this->get_tag( $comment, 'wp:comment_author_url');
-				$newcomments[$comment_id]['comment_date']         = $this->get_tag( $comment, 'wp:comment_date');
-				$newcomments[$comment_id]['comment_date_gmt']     = $this->get_tag( $comment, 'wp:comment_date_gmt');
-				$newcomments[$comment_id]['comment_content']      = $this->get_tag( $comment, 'wp:comment_content');
-				$newcomments[$comment_id]['comment_approved']     = $this->get_tag( $comment, 'wp:comment_approved');
-				$newcomments[$comment_id]['comment_type']         = $this->get_tag( $comment, 'wp:comment_type');
-				$newcomments[$comment_id]['comment_parent'] 	  = $this->get_tag( $comment, 'wp:comment_parent');
+	function process_menu_item( $item ) {
+		$menu_slug = false;
+		// loop through terms, assume first nav_menu term is correct menu
+		foreach ( $item['terms'] as $term ) {
+			if ( 'nav_menu' == $term['domain'] ) {
+				$menu_slug = $term['slug'];
+				break;
 			}
-			// Sort by comment ID, to make sure comment parents exist (if there at all)
-			ksort($newcomments);
-			foreach ($newcomments as $key => $comment) {
-				// if this is a new post we can skip the comment_exists() check
-				if ( !$post_exists || !comment_exists($comment['comment_author'], $comment['comment_date']) ) {
-					if (isset($inserted_comments[$comment['comment_parent']]))
-						$comment['comment_parent'] = $inserted_comments[$comment['comment_parent']];
-					$comment = wp_filter_comment($comment);
-					$inserted_comments[$key] = wp_insert_comment($comment);
-					$num_comments++;
-				}
-			}
 		}
 
-		if ( $num_comments )
-			printf(' '._n('(%s comment)', '(%s comments)', $num_comments, 'wordpress-importer'), $num_comments);
+		// no nav_menu term associated with this menu item
+		if ( ! $menu_slug ) {
+			_e( 'Menu item skipped due to missing menu slug', 'wordpress-importer' );
+			echo '<br />';
+			return;
+		}
 
-		// Now for post meta
-		preg_match_all('|<wp:postmeta>(.*?)</wp:postmeta>|is', $post, $postmeta);
-		$postmeta = $postmeta[1];
-		if ( $postmeta) { foreach ($postmeta as $p) {
-			$key   = $this->get_tag( $p, 'wp:meta_key' );
-			$value = $this->get_tag( $p, 'wp:meta_value' );
+		$menu_id = term_exists( $menu_slug, 'nav_menu' );
+		if ( ! $menu_id ) {
+			_e( sprintf( 'Menu item skipped due to invalid menu slug: %s', esc_html( $menu_slug ) ), 'wordpress-importer' );
+			echo '<br />';
+			return;
+		} else {
+			$menu_id = is_array( $menu_id ) ? $menu_id['term_id'] : $menu_id;
+		}
 
-			$this->process_post_meta($post_id, $key, $value);
+		foreach ( $item['postmeta'] as $meta )
+			$$meta['key'] = $meta['value'];
 
-		} }
+		if ( 'taxonomy' == $_menu_item_type && isset( $this->processed_terms[intval($_menu_item_object_id)] ) ) {
+			$_menu_item_object_id = $this->processed_terms[intval($_menu_item_object_id)];
+		} else if ( 'post_type' == $_menu_item_type && isset( $this->processed_posts[intval($_menu_item_object_id)] ) ) {
+			$_menu_item_object_id = $this->processed_posts[intval($_menu_item_object_id)];
+		} else if ( 'custom' != $_menu_item_type ) {
+			// associated object is missing or not imported yet, we'll retry later
+			$this->missing_menu_items[] = $item;
+			return;
+		}
 
-		do_action('import_post_added', $post_id);
-		print "</li>\n";
-	}
-
-	function process_post_meta($post_id, $key, $value) {
-		// the filter can return false to skip a particular metadata key
-		$_key = apply_filters('import_post_meta_key', $key);
-		if ( $_key ) {
-			add_post_meta( $post_id, $_key, $value );
-			do_action('import_post_meta', $post_id, $_key, $value);
+		if ( isset( $this->processed_menu_items[intval($_menu_item_menu_item_parent)] ) ) {
+			$_menu_item_menu_item_parent = $this->processed_menu_items[intval($_menu_item_menu_item_parent)];
+		} else if ( $_menu_item_menu_item_parent ) {
+			$this->menu_item_orphans[intval($item['post_id'])] = (int) $_menu_item_menu_item_parent;
+			$_menu_item_menu_item_parent = 0;
 		}
-	}
 
-	function process_attachment($postdata, $remote_url) {
-		if ($this->fetch_attachments and $remote_url) {
-			printf( __('Importing attachment <em>%s</em>... ', 'wordpress-importer'), htmlspecialchars($remote_url) );
+		$args = array(
+			'menu-item-object-id' => $_menu_item_object_id,
+			'menu-item-object' => $_menu_item_object,
+			'menu-item-parent-id' => $_menu_item_menu_item_parent,
+			'menu-item-position' => intval( $item['menu_order'] ),
+			'menu-item-type' => $_menu_item_type,
+			'menu-item-title' => $item['post_title'],
+			'menu-item-url' => $_menu_item_url,
+			'menu-item-description' => $item['post_content'],
+			'menu-item-attr-title' => $item['post_excerpt'],
+			'menu-item-target' => $_menu_item_target,
+			'menu-item-classes' => $_menu_item_classes,
+			'menu-item-xfn' => $_menu_item_xfn,
+			'menu-item-status' => $item['status']
+		);
 
-			// If the URL is absolute, but does not contain http, upload it assuming the base_site_url variable
-			if ( preg_match('/^\/[\w\W]+$/', $remote_url) )
-				$remote_url = rtrim($this->base_url,'/').$remote_url;
+		$id = wp_update_nav_menu_item( $menu_id, 0, $args );
+		if ( $id && ! is_wp_error( $id ) )
+			$this->processed_menu_items[intval($item['post_id'])] = (int) $id;
+	}
 
-			$upload = $this->fetch_remote_file($postdata, $remote_url);
-			if ( is_wp_error($upload) ) {
-				printf( __('Remote file error: %s', 'wordpress-importer'), htmlspecialchars($upload->get_error_message()) );
-				return $upload;
-			}
-			else {
-				print '('.size_format(filesize($upload['file'])).')';
-			}
+	function process_attachment( $post, $url ) {
+		if ( ! ( $this->fetch_attachments && $url ) )
+			return new WP_Error( 'attachment_processing_error',
+				__( 'Fetching attachments is not allowed or an empty URL was provided', 'wordpress-importer' ) );
 
-			if ( 0 == filesize( $upload['file'] ) ) {
-				print __( "Zero length file, deleting" , 'wordpress-importer') . "\n";
-				unlink( $upload['file'] );
-				return;
-			}
+		// if the URL is absolute, but does not contain address, then upload it assuming base_site_url
+		if ( preg_match( '|^/[\w\W]+$|', $url ) )
+			$url = rtrim( $this->base_url, '/' ) . $url;
 
-			if ( $info = wp_check_filetype($upload['file']) ) {
-				$postdata['post_mime_type'] = $info['type'];
-			}
-			else {
-				print __('Invalid file type', 'wordpress-importer');
-				return;
-			}
+		$upload = $this->fetch_remote_file( $url, $post );
+		if ( is_wp_error( $upload ) )
+			return $upload;
 
-			$postdata['guid'] = $upload['url'];
+		if ( $info = wp_check_filetype( $upload['file'] ) )
+			$post['post_mime_type'] = $info['type'];
+		else
+			return new WP_Error( 'attachment_processing_error', __('Invalid file type', 'wordpress-importer') );
 
-			// as per wp-admin/includes/upload.php
-			$post_id = wp_insert_attachment($postdata, $upload['file']);
-			wp_update_attachment_metadata( $post_id, wp_generate_attachment_metadata( $post_id, $upload['file'] ) );
+		$post['guid'] = $upload['url'];
 
-			// remap the thumbnail url.  this isn't perfect because we're just guessing the original url.
-			if ( preg_match('@^image/@', $info['type']) && $thumb_url = wp_get_attachment_thumb_url($post_id) ) {
-				$parts = pathinfo($remote_url);
-				$ext = $parts['extension'];
-				$name = basename($parts['basename'], ".{$ext}");
-				$this->url_remap[$parts['dirname'] . '/' . $name . '.thumbnail.' . $ext] = $thumb_url;
-			}
+		// as per wp-admin/includes/upload.php
+		$post_id = wp_insert_attachment( $post, $upload['file'] );
+		wp_update_attachment_metadata( $post_id, wp_generate_attachment_metadata( $post_id, $upload['file'] ) );
 
-			return $post_id;
+		// remap the thumbnail url.  this isn't perfect because we're just guessing the original url.
+		if ( preg_match( '@^image/@', $info['type'] ) && $thumb_url = wp_get_attachment_thumb_url( $post_id ) ) {
+			$parts = pathinfo( $url );
+			$ext = $parts['extension'];
+			$name = basename($parts['basename'], ".{$ext}");
+			$this->url_remap[$parts['dirname'] . '/' . $name . '.thumbnail.' . $ext] = $thumb_url;
 		}
-		else {
-			printf( __('Skipping attachment <em>%s</em>', 'wordpress-importer'), htmlspecialchars($remote_url) );
-		}
+
+		return $post_id;
 	}
 
-	function fetch_remote_file( $post, $url ) {
+	function fetch_remote_file( $url, $post ) {
 		add_filter( 'http_request_timeout', array( &$this, 'bump_request_timeout' ) );
 
-		$upload = wp_upload_dir($post['post_date']);
-
 		// extract the file name and extension from the url
-		$file_name = basename($url);
+		$file_name = basename( $url );
 
-		// get placeholder file in the upload dir with a unique sanitized filename
-		$upload = wp_upload_bits( $file_name, 0, '', $post['post_date']);
-		if ( $upload['error'] ) {
-			echo $upload['error'];
+		// get placeholder file in the upload dir with a unique, sanitized filename
+		$upload = wp_upload_bits( $file_name, 0, '', $post['post_date'] );
+		if ( $upload['error'] )
 			return new WP_Error( 'upload_dir_error', $upload['error'] );
-		}
 
 		// fetch the remote url and write it to the placeholder file
-		$headers = wp_get_http($url, $upload['file']);
+		$headers = wp_get_http( $url, $upload['file'] );
 
-		//Request failed
+		// request failed
 		if ( ! $headers ) {
-			@unlink($upload['file']);
+			@unlink( $upload['file'] );
 			return new WP_Error( 'import_file_error', __('Remote server did not respond', 'wordpress-importer') );
 		}
 
 		// make sure the fetch was successful
 		if ( $headers['response'] != '200' ) {
-			@unlink($upload['file']);
-			return new WP_Error( 'import_file_error', sprintf(__('Remote file returned error response %1$d %2$s', 'wordpress-importer'), $headers['response'], get_status_header_desc($headers['response']) ) );
+			@unlink( $upload['file'] );
+			return new WP_Error( 'import_file_error', sprintf( __('Remote server returned error response %1$d %2$s', 'wordpress-importer'), $headers['response'], get_status_header_desc($headers['response']) ) );
 		}
-		elseif ( isset($headers['content-length']) && filesize($upload['file']) != $headers['content-length'] ) {
-			@unlink($upload['file']);
+
+		$filesize = filesize( $upload['file'] );
+
+		if ( isset( $headers['content-length'] ) && $filesize != $headers['content-length'] ) {
+			@unlink( $upload['file'] );
 			return new WP_Error( 'import_file_error', __('Remote file is incorrect size', 'wordpress-importer') );
 		}
 
-		$max_size = $this->max_attachment_size();
-		if ( !empty($max_size) and filesize($upload['file']) > $max_size ) {
-			@unlink($upload['file']);
+		if ( 0 == $filesize ) {
+			@unlink( $upload['file'] );
+			return new WP_Error( 'import_file_error', __('Zero size file downloaded', 'wordpress-importer') );
+		}
+
+		$max_size = (int) $this->max_attachment_size();
+		if ( ! empty( $max_size ) && $filesize > $max_size ) {
+			@unlink( $upload['file'] );
 			return new WP_Error( 'import_file_error', sprintf(__('Remote file is too large, limit is %s', size_format($max_size), 'wordpress-importer')) );
 		}
 
@@ -736,170 +642,111 @@
 		$this->url_remap[$url] = $upload['url'];
 		$this->url_remap[$post['guid']] = $upload['url'];
 		// if the remote url is redirected somewhere else, keep track of the destination too
-		if ( $headers['x-final-location'] != $url )
+		if ( isset($headers['x-final-location']) && $headers['x-final-location'] != $url )
 			$this->url_remap[$headers['x-final-location']] = $upload['url'];
 
 		return $upload;
-
 	}
 
-	/**
-	 * Bump up the request timeout for http requests
-	 *
-	 * @param int $val
-	 * @return int
-	 */
-	function bump_request_timeout( $val ) {
-		return 60;
-	}
+	function backfill_parents() {
+		global $wpdb;
 
-	// sort by strlen, longest string first
-	function cmpr_strlen($a, $b) {
-		return strlen($b) - strlen($a);
+		// find parents for post orphans
+		foreach ( $this->post_orphans as $child_id => $parent_id ) {
+			$local_child_id = $local_parent_id = false;
+			if ( isset( $this->processed_posts[$child_id] ) )
+				$local_child_id = $this->processed_posts[$child_id];
+			if ( isset( $this->processed_posts[$parent_id] ) )
+				$local_parent_id = $this->processed_posts[$parent_id];
+
+			if ( $local_child_id && $local_parent_id )
+				$wpdb->update( $wpdb->posts, array( 'post_parent' => $local_parent_id ), array( 'ID' => $local_child_id ), '%d', '%d' );
+		}
+
+		// all other posts/terms are imported, retry menu items with missing associated object
+		$missing_menu_items = $this->missing_menu_items;
+		foreach ( $missing_menu_items as $item )
+			$this->process_menu_item( $item );
+
+		// find parents for menu item orphans
+		foreach ( $this->menu_item_orphans as $child_id => $parent_id ) {
+			$local_child_id = $local_parent_id = 0;
+			if ( isset( $this->processed_menu_items[$child_id] ) )
+				$local_child_id = $this->processed_menu_items[$child_id];
+			if ( isset( $this->processed_menu_items[$parent_id] ) )
+				$local_parent_id = $this->processed_menu_items[$parent_id];
+
+			if ( $local_child_id && $local_parent_id )
+				update_post_meta( $local_child_id, '_menu_item_menu_item_parent', (int) $local_parent_id );
+		}
 	}
 
-	// update url references in post bodies to point to the new local files
 	function backfill_attachment_urls() {
+		global $wpdb;
 
 		// make sure we do the longest urls first, in case one is a substring of another
-		uksort($this->url_remap, array(&$this, 'cmpr_strlen'));
+		uksort( $this->url_remap, array(&$this, 'cmpr_strlen') );
 
-		global $wpdb;
-		foreach ($this->url_remap as $from_url => $to_url) {
+		foreach ( $this->url_remap as $from_url => $to_url ) {
 			// remap urls in post_content
-			$wpdb->query( $wpdb->prepare("UPDATE {$wpdb->posts} SET post_content = REPLACE(post_content, '%s', '%s')", $from_url, $to_url) );
+			$wpdb->query( $wpdb->prepare("UPDATE {$wpdb->posts} SET post_content = REPLACE(post_content, %s, %s)", $from_url, $to_url) );
 			// remap enclosure urls
-			$result = $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->postmeta} SET meta_value = REPLACE(meta_value, '%s', '%s') WHERE meta_key='enclosure'", $from_url, $to_url) );
+			$result = $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->postmeta} SET meta_value = REPLACE(meta_value, %s, %s) WHERE meta_key='enclosure'", $from_url, $to_url) );
 		}
 	}
 
-	// update the post_parent of orphans now that we know the local id's of all parents
-	function backfill_parents() {
-		global $wpdb;
+	function parse( $file ) {
+		$parser = new WXR_Parser();
+		return $parser->parse( $file );
+	}
 
-		foreach ($this->orphans as $child_id => $parent_id) {
-			$local_child_id = $local_parent_id = false;
-			if ( isset( $this->post_ids_processed[$child_id] ) )
-				$local_child_id = $this->post_ids_processed[$child_id];
-			if ( isset( $this->post_ids_processed[$parent_id] ) )
-				$local_parent_id = $this->post_ids_processed[$parent_id];
+	function header() {
+		echo '<div class="wrap">';
+		screen_icon();
+		echo '<h2>' . __( 'Import WordPress', 'wordpress-importer' ) . '</h2>';
+	}
 
-			if ($local_child_id and $local_parent_id) {
-				$wpdb->update($wpdb->posts, array('post_parent' => $local_parent_id), array('ID' => $local_child_id) );
-			}
-		}
+	function footer() {
+		echo '</div>';
 	}
 
-	function is_valid_meta_key($key) {
+	function greet() {
+		echo '<div class="narrow">';
+		echo '<p>'.__( 'Howdy! Upload your WordPress eXtended RSS (WXR) file and we&#8217;ll import the posts, pages, comments, custom fields, categories, and tags into this site.', 'wordpress-importer' ).'</p>';
+		echo '<p>'.__( 'Choose a WXR file to upload, then click Upload file and import.', 'wordpress-importer' ).'</p>';
+		wp_import_upload_form( 'admin.php?import=wordpress&amp;step=1' );
+		echo '</div>';
+	}
+
+	function is_valid_meta_key( $key ) {
 		// skip attachment metadata since we'll regenerate it from scratch
-		if ( $key == '_wp_attached_file' || $key == '_wp_attachment_metadata' )
+		// skip _edit_lock and _edit_last as not useful
+		if ( in_array( $key, array( '_wp_attached_file', '_wp_attachment_metadata', '_edit_lock', '_edit_last' ) ) )
 			return false;
 		return $key;
 	}
 
 	// give the user the option of creating new users to represent authors in the import file?
 	function allow_create_users() {
-		return apply_filters('import_allow_create_users', true);
+		return apply_filters( 'import_allow_create_users', true );
 	}
 
 	// give the user the option of downloading and importing attached files
 	function allow_fetch_attachments() {
-		return apply_filters('import_allow_fetch_attachments', true);
+		return apply_filters( 'import_allow_fetch_attachments', true );
 	}
 
-	function max_attachment_size() {
-		// can be overridden with a filter - 0 means no limit
-		return apply_filters('import_attachment_size_limit', 0);
+	function bump_request_timeout() {
+		return 60;
 	}
 
-	function import_start() {
-		wp_defer_term_counting(true);
-		wp_defer_comment_counting(true);
-		do_action('import_start');
+	function max_attachment_size() {
+		return apply_filters( 'import_attachment_size_limit', 0 );
 	}
 
-	function import_end() {
-		do_action('import_end');
-
-		// clear the caches after backfilling
-		foreach ($this->post_ids_processed as $post_id)
-			clean_post_cache($post_id);
-
-		wp_defer_term_counting(false);
-		wp_defer_comment_counting(false);
+	function cmpr_strlen( $a, $b ) {
+		return strlen($b) - strlen($a);
 	}
-
-	function import($id, $fetch_attachments = false) {
-		$this->id = (int) $id;
-		$this->fetch_attachments = ($this->allow_fetch_attachments() && (bool) $fetch_attachments);
-
-		add_filter('import_post_meta_key', array($this, 'is_valid_meta_key'));
-		$file = get_attached_file($this->id);
-		$this->import_file($file);
-	}
-
-	function import_file($file) {
-		$this->file = $file;
-
-		$this->import_start();
-		$this->get_authors_from_post();
-		wp_suspend_cache_invalidation(true);
-		$this->get_entries();
-		$this->process_categories();
-		$this->process_tags();
-		$this->process_terms();
-		$result = $this->process_posts();
-		wp_suspend_cache_invalidation(false);
-		$this->backfill_parents();
-		$this->backfill_attachment_urls();
-		$this->import_end();
-
-		if ( is_wp_error( $result ) )
-			return $result;
-	}
-
-	function handle_upload() {
-		$file = wp_import_handle_upload();
-		if ( isset($file['error']) ) {
-			echo '<p>'.__('Sorry, there has been an error.', 'wordpress-importer').'</p>';
-			echo '<p><strong>' . $file['error'] . '</strong></p>';
-			return false;
-		}
-		$this->file = $file['file'];
-		$this->id = (int) $file['id'];
-		return true;
-	}
-
-	function dispatch() {
-		if (empty ($_GET['step']))
-			$step = 0;
-		else
-			$step = (int) $_GET['step'];
-
-		$this->header();
-		switch ($step) {
-			case 0 :
-				$this->greet();
-				break;
-			case 1 :
-				check_admin_referer('import-upload');
-				if ( $this->handle_upload() )
-					$this->select_authors();
-				break;
-			case 2:
-				check_admin_referer('import-wordpress');
-				$fetch_attachments = ! empty( $_POST['attachments'] );
-				$result = $this->import( $_GET['id'], $fetch_attachments);
-				if ( is_wp_error( $result ) )
-					echo $result->get_error_message();
-				break;
-		}
-		$this->footer();
-	}
-
-	function WP_Import() {
-		// Nothing.
-	}
 }
 
 /**
