Index: uninstall.php
===================================================================
--- uninstall.php	(revision 0)
+++ uninstall.php	(revision 0)
@@ -0,0 +1,5 @@
+<?php 
+if( ! defined('ABSPATH') && ! defined( 'WP_UNINSTALL_PLUGIN' ) )
+	exit;
+delete_option('rss_importer');
+?>
\ No newline at end of file
Index: parsers.php
===================================================================
--- parsers.php	(revision 0)
+++ parsers.php	(revision 0)
@@ -0,0 +1,224 @@
+<?php
+/**
+ * WordPress RSS file parser implementations
+ *
+ * @package WordPress
+ * @subpackage Importer
+ */
+
+/**
+ * WordPress Importer class for managing parsing of RSS files.
+ */
+class RSS_Parser {
+	function parse( $file ) {
+		// Attempt to use simpleXML
+		if ( extension_loaded( 'simplexml' ) ) {
+			$parser = new RSS_Parser_SimpleXML;
+			$result = $parser->parse( $file );
+
+			// If SimpleXML succeeds or this is an invalid file then return the results
+			if ( ! is_wp_error( $result ) || 'SimpleXML_parse_error' != $result->get_error_code() )
+				return $result;
+		} 
+		else {
+			$parser = new RSS_Parser_XML;
+			$result = $parser->parse( $file );
+
+			// If XMLParser succeeds or this is an invalid file then return the results
+			if ( ! is_wp_error( $result ) || 'XML_parse_error' != $result->get_error_code() )
+				return $result;
+		}
+
+		// We have a malformed XML file, so display the error and fallthrough to regex
+		if ( isset( $result ) && defined( 'IMPORT_DEBUG' ) && IMPORT_DEBUG ) {
+			echo '<pre>';
+			if ( 'SimpleXML_parse_error' == $result->get_error_code() ) {
+				foreach  ( $result->get_error_data() as $error )
+					echo $error->line . ':' . $error->column . ' ' . esc_html( $error->message ) . "\n";
+			} else if ( 'XML_parse_error' == $result->get_error_code() ) {
+				$error = $result->get_error_data();
+				echo $error[0] . ':' . $error[1] . ' ' . esc_html( $error[2] );
+			}
+			echo '</pre>';
+			echo '<p><strong>' . __( 'There was an error when reading this RSS file', 'rss-importer' ) . '</strong><br />';
+		}
+	}
+}
+
+/**
+ * RSS Parser that makes use of the SimpleXML PHP extension.
+ */
+class RSS_Parser_SimpleXML {
+	function parse( $file ) {
+		libxml_use_internal_errors( true );
+		
+		$posts = array();
+
+		$xml = simplexml_load_file( $file );
+		// halt if loading produces an error
+		if ( ! $xml )
+			return new WP_Error( 'SimpleXML_parse_error', __( 'There was an error when reading this RSS file.', 'rss-importer' ), libxml_get_errors() );
+
+		$attributes = $xml->attributes();
+		error_log( var_export( $attributes, true ) );
+		if ( ! isset( $attributes['version'] ) || strval( $attributes['version'] ) != '2.0' )
+			return new WP_Error( 'RSS_parse_error', __( 'This does not appear to be a RSS 2.0 file, missing/invalid RSS version number.', 'rss-importer' ) );
+
+		// grab posts
+		foreach ( $xml->channel->item as $item ) {
+			$post = array();
+			$dc = $item->children( 'http://purl.org/dc/elements/1.1/' );
+			$content = $item->children( 'http://purl.org/rss/1.0/modules/content/' );
+			
+			$post['post_title'] = (string) $item->title;
+			if( isset( $item->guid ) ) 
+				$post['guid'] =  (string) $item->guid;
+			
+			$published = false;
+			// attempt to find publication date
+			if ( isset( $item->pubDate ) )
+				$published = strtotime( (string) $item->pubDate );
+			else if ( $dc->date ) {
+				// if we don't already have something from pubDate
+				$published = preg_replace( '|([-+])([0-9]+):([0-9]+)$|', '\1\2\3', (string) $dc->date );
+				$published = str_replace( 'T', ' ', $published );
+				$published = strtotime( $published );
+			}
+			
+			if( $published ) {
+				$post['post_date_gmt'] = gmdate( 'Y-m-d H:i:s', $published );
+				$post['post_date'] = get_date_from_gmt( $post['post_date_gmt'] );
+			}
+			
+			$categories = array();
+			foreach ( $item->category as $c )
+				$categories[] = html_entity_decode( (string) $c );
+			foreach( $dc->subject as $c )
+				$categories[] = html_entity_decode( (string) $c );	// CHECK
+				
+			// need to handle these categories 
+			$post['categories'] = $categories;	// need to check this
+			
+			// if no content, use the description
+			$post['post_content'] = isset( $content->encoded ) ? (string) $content->encoded : (string) $item->description;
+			$post['post_excerpt'] = isset( $content->encoded ) ? (string) $item->description : '';
+			
+			if( isset( $dc->creator ) )
+				$post['imported_author_name'] = (string) $dc->creator;	// for later
+
+			$posts[] = $post;
+		}
+
+		return $posts;
+	}
+}
+
+/**
+ * RSS Parser that makes use of the XML Parser PHP extension.
+ */
+class RSS_Parser_XML {
+	function parse( $file ) {
+		$this->cdata = false;
+		$this->posts = array();
+		$this->post = array();
+		$this->post['categories'] = array();
+		$this->in_item = false;
+		$this->rss_version = false;
+
+		$xml = xml_parser_create( 'UTF-8' );
+		xml_parser_set_option( $xml, XML_OPTION_SKIP_WHITE, 1 );
+		xml_parser_set_option( $xml, XML_OPTION_CASE_FOLDING, 0 );
+		xml_set_object( $xml, $this );
+		xml_set_character_data_handler( $xml, 'cdata' );
+		xml_set_element_handler( $xml, 'tag_open', 'tag_close' );
+
+		if ( ! xml_parse( $xml, file_get_contents( $file ), true ) ) {
+			$current_line = xml_get_current_line_number( $xml );
+			$current_column = xml_get_current_column_number( $xml );
+			$error_code = xml_get_error_code( $xml );
+			$error_string = xml_error_string( $error_code );
+			return new WP_Error( 'XML_parse_error', __( 'There was an error when reading this RSS file.', 'rss-importer' ), array( $current_line, $current_column, $error_string ) );
+		}
+		xml_parser_free( $xml );
+		
+		if( '2.0' != $this->rss_version )
+			return new WP_Error( 'RSS_parse_error', __( 'This does not appear to be a RSS 2.0 file, missing/invalid RSS version number.', 'rss-importer' ) );
+
+		return $this->posts;
+	}
+
+	function tag_open( $parse, $tag, $attr ) {
+		if( 'rss' == $tag )
+			$this->rss_version = isset( $attr['version'] ) ? $attr['version'] : false;
+		if( 'item' == $tag )
+			$this->in_item = true;
+	}
+
+	function cdata( $parser, $cdata ) {
+		if ( ! trim( $cdata ) )
+			return;
+
+		$this->cdata .= trim( $cdata );
+	}
+
+	function tag_close( $parser, $tag ) {
+		if( ! $this->in_item ) {
+			$this->cdata = false;
+			return;
+		}
+		
+		switch ( $tag ) {
+			case 'title':
+				$this->post['post_title'] = $this->cdata;
+				break;
+			case 'guid':
+				$this->post['guid'] = $this->cdata;
+				break;
+			case 'pubDate':
+			case 'dc:date':
+				if( isset( $this->post['post_date'] ) )
+					break;
+				if( 'pubDate' == $tag ) {
+					$published = strtotime( $this->cdata );
+				}
+				else {
+					// if we don't already have something from pubDate
+					$published = preg_replace( '|([-+])([0-9]+):([0-9]+)$|', '\1\2\3', $this->cdata );
+					$published = str_replace( 'T', ' ', $published );
+					$published = strtotime( $published );
+				}
+				$this->post['post_date_gmt'] = gmdate( 'Y-m-d H:i:s', $published );
+				$this->post['post_date'] = get_date_from_gmt( $this->post['post_date_gmt'] );
+				break;
+			case 'category':
+				$this->post['categories'][] = $this->cdata;
+				break;
+			case 'content:encoded':
+				$this->post['post_content'] = $this->cdata;
+				break;
+			case 'description':
+				$this->post['post_excerpt'] = $this->cdata;
+				break;
+			case 'dc:creator':
+				$this->post['imported_author_name'] = $this->cdata;
+				break;
+			case 'item':
+				// tidy up
+				if( empty( $this->post['post_content'] ) && ! empty( $this->post['post_excerpt'] ) ) {
+					$this->post['post_content'] = $this->post['post_excerpt'];
+					unset( $this->post['post_excerpt'] );
+				}
+
+				$this->posts[] = $this->post;
+				
+				// reset
+				$this->post = array();
+				$this->post['categories'] = array();
+				$this->in_item = false;
+				
+				break;
+		}
+
+		$this->cdata = false;
+	}
+}
\ No newline at end of file
Index: rss-importer.php
===================================================================
--- rss-importer.php	(revision 368063)
+++ rss-importer.php	(working copy)
@@ -5,23 +5,28 @@
 Description: Import posts from an RSS feed.
 Author: wordpressdotorg
 Author URI: http://wordpress.org/
-Version: 0.2
+Version: 0.3b
 Stable tag: 0.2
 License: GPL version 2 or later - http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
 */
 
-if ( !defined('WP_LOAD_IMPORTERS') )
+if ( ! defined( 'WP_LOAD_IMPORTERS' ) )
 	return;
 
+// use for debug
+define( 'IMPORT_DEBUG', false );
+
 // Load Importer API
 require_once ABSPATH . 'wp-admin/includes/import.php';
 
-if ( !class_exists( 'WP_Importer' ) ) {
+if ( ! class_exists( 'WP_Importer' ) ) {
 	$class_wp_importer = ABSPATH . 'wp-admin/includes/class-wp-importer.php';
 	if ( file_exists( $class_wp_importer ) )
-		require_once $class_wp_importer;
+		require $class_wp_importer;
 }
 
+require dirname( __FILE__ ) . '/parsers.php';
+
 /**
  * RSS Importer
  *
@@ -38,187 +43,187 @@
  *
  * @since unknown
  */
-if ( class_exists( 'WP_Importer' ) ) {
+if ( class_exists( 'WP_Importer' ) ) :
 class RSS_Import extends WP_Importer {
-
+	var $id; // WXR attachment ID
 	var $posts = array ();
 	var $file;
-
-	function header() {
-		echo '<div class="wrap">';
-		screen_icon();
-		echo '<h2>'.__('Import RSS', 'rss-importer').'</h2>';
-	}
-
-	function footer() {
-		echo '</div>';
-	}
-
+	
 	function greet() {
 		echo '<div class="narrow">';
-		echo '<p>'.__('Howdy! This importer allows you to extract posts from an RSS 2.0 file into your WordPress site. This is useful if you want to import your posts from a system that is not handled by a custom import tool. Pick an RSS file to upload and click Import.', 'rss-importer').'</p>';
-		wp_import_upload_form("admin.php?import=rss&amp;step=1");
+		echo '<p>' . __( 'Howdy! This importer allows you to extract posts from an RSS 2.0 file into your WordPress site. This is useful if you want to import your posts from a system that is not handled by a custom import tool. Pick an RSS file to upload and click Import.', 'rss-importer' ) . '</p>';
+		wp_import_upload_form( 'admin.php?import=rss&amp;step=1' );
 		echo '</div>';
 	}
 
-	function _normalize_tag( $matches ) {
-		return '<' . strtolower( $matches[1] );
-	}
-
-	function get_posts() {
-		global $wpdb;
-
-		set_magic_quotes_runtime(0);
-		$datalines = file($this->file); // Read the file into an array
-		$importdata = implode('', $datalines); // squish it
-		$importdata = str_replace(array ("\r\n", "\r"), "\n", $importdata);
-
-		preg_match_all('|<item>(.*?)</item>|is', $importdata, $this->posts);
-		$this->posts = $this->posts[1];
-		$index = 0;
-		foreach ($this->posts as $post) {
-			preg_match('|<title>(.*?)</title>|is', $post, $post_title);
-			$post_title = str_replace(array('<![CDATA[', ']]>'), '', $wpdb->escape( trim($post_title[1]) ));
-
-			preg_match('|<pubdate>(.*?)</pubdate>|is', $post, $post_date_gmt);
-
-			if ($post_date_gmt) {
-				$post_date_gmt = strtotime($post_date_gmt[1]);
-			} else {
-				// if we don't already have something from pubDate
-				preg_match('|<dc:date>(.*?)</dc:date>|is', $post, $post_date_gmt);
-				$post_date_gmt = preg_replace('|([-+])([0-9]+):([0-9]+)$|', '\1\2\3', $post_date_gmt[1]);
-				$post_date_gmt = str_replace('T', ' ', $post_date_gmt);
-				$post_date_gmt = strtotime($post_date_gmt);
-			}
-
-			$post_date_gmt = gmdate('Y-m-d H:i:s', $post_date_gmt);
-			$post_date = get_date_from_gmt( $post_date_gmt );
-
-			preg_match_all('|<category>(.*?)</category>|is', $post, $categories);
-			$categories = $categories[1];
-
-			if (!$categories) {
-				preg_match_all('|<dc:subject>(.*?)</dc:subject>|is', $post, $categories);
-				$categories = $categories[1];
-			}
-
-			$cat_index = 0;
-			foreach ($categories as $category) {
-				$categories[$cat_index] = $wpdb->escape( html_entity_decode( $category ) );
-				$cat_index++;
-			}
-
-			preg_match('|<guid.*?>(.*?)</guid>|is', $post, $guid);
-			if ($guid)
-				$guid = $wpdb->escape(trim($guid[1]));
-			else
-				$guid = '';
-
-			preg_match('|<content:encoded>(.*?)</content:encoded>|is', $post, $post_content);
-			$post_content = str_replace(array ('<![CDATA[', ']]>'), '', $wpdb->escape(trim($post_content[1])));
-
-			if (!$post_content) {
-				// This is for feeds that put content in description
-				preg_match('|<description>(.*?)</description>|is', $post, $post_content);
-				$post_content = $wpdb->escape( html_entity_decode( trim( $post_content[1] ) ) );
-			}
-
-			// Clean up content
-			$post_content = preg_replace_callback('|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_content);
-			$post_content = str_replace('<br>', '<br />', $post_content);
-			$post_content = str_replace('<hr>', '<hr />', $post_content);
-
-			$post_author = 1;
-			$post_status = 'publish';
-			$this->posts[$index] = compact('post_author', 'post_date', 'post_date_gmt', 'post_content', 'post_title', 'post_status', 'guid', 'categories');
-			$index++;
-		}
-	}
-
-	function import_posts() {
+	function import_posts( $posts ) {
+		$authors = array();	// author name => array (post_ids)
+		
 		echo '<ol>';
+		foreach ( $posts as $post ) {
+			echo '<li>' . __( 'Importing post...', 'rss-importer' );
 
-		foreach ($this->posts as $post) {
-			echo "<li>".__('Importing post...', 'rss-importer');
-
-			extract($post);
-
-			if ($post_id = post_exists($post_title, $post_content, $post_date)) {
-				_e('Post already imported', 'rss-importer');
+			$post['post_author'] = get_current_user_id();
+			$post['post_status'] = 'publish';
+			
+			if ( $post_id = post_exists( $post['post_title'], $post['post_content'], $post['post_date'] ) ) {
+				_e( 'Post already imported.', 'rss-importer' );
 			} else {
-				$post_id = wp_insert_post($post);
+				$post_id = wp_insert_post( $post );
+				
 				if ( is_wp_error( $post_id ) )
 					return $post_id;
-				if (!$post_id) {
-					_e('Couldn&#8217;t get post ID', 'rss-importer');
+				
+				if ( ! $post_id ) {
+					_e( 'Couldn&#8217;t get post ID', 'rss-importer' );
 					return;
 				}
 
-				if (0 != count($categories))
-					wp_create_categories($categories, $post_id);
+				if ( ! empty( $post['categories'] ) )
+					wp_create_categories( $post['categories'], $post_id );
+				
 				_e('Done!', 'rss-importer');
 			}
+			
+			if ( isset( $post['imported_author_name'] ) ) {
+				$n = $post['imported_author_name'];
+				if ( isset( $authors[$n] ) )
+					$authors[$n][] = (int) $post_id;
+				else
+					$authors[$n] = array( (int) $post_id );	
+			}
+			
 			echo '</li>';
 		}
 
 		echo '</ol>';
-
+		
+		if( ! empty( $authors ) )
+			update_option( 'rss_importer', $authors );
 	}
 
-	function import() {
+	/**
+	 * Handles the WXR upload and initial parsing of the file to prepare for
+	 * displaying author import options
+	 *
+	 * @return bool False if error uploading or invalid file, true otherwise
+	 */
+	function handle_upload() {
+		check_admin_referer( 'import-upload' );
 		$file = wp_import_handle_upload();
-		if ( isset($file['error']) ) {
-			echo $file['error'];
-			return;
+		
+		if ( isset( $file['error'] ) ) {
+			echo '<p><strong>' . __( 'Sorry, there has been an error.', 'rss-importer' ) . '</strong><br />';
+			echo esc_html( $file['error'] ) . '</p>';
+			return false;
 		}
 
-		$this->file = $file['file'];
-		$this->get_posts();
-		$result = $this->import_posts();
+		$parser = new RSS_Parser();
+		$posts = $parser->parse( $file['file'] );
+		
+		if ( is_wp_error( $posts ) ) {
+			echo '<p><strong>' . __( 'Sorry, there has been an error.', 'wordpress-importer' ) . '</strong><br />' . esc_html( $posts->get_error_message() ) . '</p>';
+			return false;
+		}
+		
+		$result = $this->import_posts( $posts );
 		if ( is_wp_error( $result ) )
 			return $result;
-		wp_import_cleanup($file['id']);
-		do_action('import_done', 'rss');
+		
+		wp_import_cleanup( $file['id'] );
+		do_action( 'import_done', 'rss' );
 
-		echo '<h3>';
-		printf(__('All done. <a href="%s">Have fun!</a>', 'rss-importer'), get_option('home'));
-		echo '</h3>';
+		echo '<p>' . __('Import complete.', 'rss-importer') . '</p>';
+		
+		$this->get_author_form();
 	}
 
+	/**
+	 * Registered callback function for the WordPress Importer
+	 *
+	 * Manages the three separate stages of the import process
+	 */
 	function dispatch() {
-		if (empty ($_GET['step']))
-			$step = 0;
-		else
-			$step = (int) $_GET['step'];
+		echo '<div class="wrap">';
+		screen_icon();
+		echo '<h2>'.__('Import RSS', 'rss-importer').'</h2>';
 
-		$this->header();
-
-		switch ($step) {
-			case 0 :
+		$step = empty( $_GET['step'] ) ? 0 : (int) $_GET['step'];
+		switch ( $step ) {
+			case 0:
 				$this->greet();
 				break;
-			case 1 :
-				check_admin_referer('import-upload');
-				$result = $this->import();
-				if ( is_wp_error( $result ) )
-					echo $result->get_error_message();
+			case 1:
+				
+				$this->handle_upload();
 				break;
+			case 2:
+				$this->update_authors();
+				break;
 		}
 
-		$this->footer();
+		echo '</div>';
 	}
 
-	function RSS_Import() {
-		// Nothing.
+	/*
+	 * Allows the user to assign authors to posts after import
+	 */
+	function get_author_form() {
+		$authors = get_option( 'rss_importer', array() );
+		if( empty( $authors ) )
+			return;
+
+		$directions = __( 'All posts were imported with the current user as author. Wordpress detected the following author names in the imported posts. Use this form to assign each imported post to a different WordPress user.', 'rss-importer' );
+		$heading = __( 'Author mapping', 'rss-importer' );
+		$mapthis = __( 'Map this name', 'rss-importer' );
+		$tothis = __( 'To this Wordpress user', 'rss-importer' );
+		$submit = esc_html( __( 'Save Changes', 'rss-importer' ) );
+		
+		$rows= '';
+		$options = '';
+
+		$blog_users = get_users( array( 'blog_id' => get_current_blog_id() ) ) ;
+		foreach ( $blog_users as $user ) {
+			$sel = selected( $user->ID, get_current_user_id(), false );
+			$options .= "<option value='$user->ID'$sel>$user->display_name</option>";
+		}
+		
+		foreach ( array_keys( $authors ) as $author ) {
+			$a = esc_html( $author );
+			$rows .= "<tr><td><label for='author-$a'>$a</label></td><td><select name='authors[$a]' id='author-$a'>" . $options . "</select></td></tr>";
+		}
+		
+		echo '<style type="text/css">#rss_import_authors th, #rss_import_authors td {padding: 3px 10px} </style>'; 
+		echo "<div class='wrap'><h2>$heading</h2><p>$directions</p><form action='index.php?import=rss&amp;step=2' method='post'>";
+		wp_nonce_field( 'import-rss' );
+		echo "<table id='rss_import_authors'><thead><th>$mapthis</th><th>$tothis</th></thead><tbody>$rows</tbody></table><input type='submit' class='button primary' value='$submit' /></form></div>";
 	}
+	
+	function update_authors() {
+		check_admin_referer( 'import-rss' );
+		global $wpdb;
+		
+		$authors = get_option( 'rss_importer', array() );
+		if( empty( $_POST['authors'] ) || empty( $authors ) )
+			return;
+		
+		foreach( $_POST['authors'] as $imported_name => $user_id_to_assign ) {
+			$post_ids = implode( ', ', $authors[$imported_name] );
+			$result = $wpdb->query( $wpdb->prepare("UPDATE $wpdb->posts SET post_author = %d WHERE ID IN ($post_ids)", $user_id_to_assign ) );
+		}
+		
+		if ( false !== $result )
+			echo '<p>' . sprintf( __( 'Post authors updated. All done!' ) . ' <a href="' . trailingslashit( get_bloginfo( 'url' ) ) . '">%s</a>', __( 'Have fun!' ) ) . '</p>';
+		else
+			echo '<p>' . __( 'An error occurred while trying to reassing post authors. Please try doing it manually.') . '</p>';
+	}
 }
 
 $rss_import = new RSS_Import();
 
-register_importer('rss', __('RSS', 'rss-importer'), __('Import posts from an RSS feed.', 'rss-importer'), array ($rss_import, 'dispatch'));
+register_importer( 'rss', __( 'RSS', 'rss-importer' ), __( 'Import posts from an RSS feed.', 'rss-importer' ), array ( $rss_import, 'dispatch' ) );
 
-} // class_exists( 'WP_Importer' )
+endif; // class_exists( 'WP_Importer' )
 
 function rss_importer_init() {
     load_plugin_textdomain( 'rss-importer', false, dirname( plugin_basename( __FILE__ ) ) . '/languages' );
