Index: wp-comments-post.php
===================================================================
--- wp-comments-post.php	(revision 7600)
+++ wp-comments-post.php	(working copy)
@@ -29,6 +29,37 @@
 $comment_author_url   = trim($_POST['url']);
 $comment_content      = trim($_POST['comment']);
 
+  // Bug 5998 Add:  Prevent invalid Unicode characters
+  // regex that matches all valid Unicode UTF-8 bytes
+  $re = "/" .
+        "[\x09\x0A\x0D\x20-\x7E]" .  				// ASCII
+        "|[\xC2-\xDF][\x80-\xBF]" . 				// non-overlong 2-byte
+        "|\xE0[\xA0-\xBF][\x80-\xBF]" . 			// excluding overlongs
+        "|[\xE1-\xEC\xEE][\x80-\xBF]{2}" .			// 3-byte, but exclude U-FFFE and U-FFFF
+        "|\xEF[\x80-\xBE][\x80-\xBF]" .
+        "|\xEF\xBF[\x80-\xBD]" .
+        "|\xED[\x80-\x9F][\x80-\xBF]" .        		// excluding surrogates
+        "|\xF0[\x90-\xBF][\x80-\xBF]{2}" . 			// planes 1-3
+        "|[\xF1-\xF3][\x80-\xBF]{3}" . 				// planes 4-15
+        "|\xF4[\x80-\x8F][\x80-\xBF]{2}" . 			// plane 16
+        "/";
+  $bad_unicode = preg_replace($re, "", $comment_author);
+  if( strlen($bad_unicode) != 0 ) {
+    wp_die( __('Sorry, I cannot let you post that.  You have entered invalid Unicode characters in your Name that could break my site') );
+  }
+  $bad_unicode = preg_replace($re, "", $comment_author_email);
+  if( strlen($bad_unicode) != 0 ) {
+    wp_die( __('Sorry, I cannot let you post that.  You have entered invalid Unicode characters in your Mail Address that could break my site') );
+  }
+  $bad_unicode = preg_replace($re, "", $comment_author_url);
+  if( strlen($bad_unicode) != 0 ) {
+    wp_die( __('Sorry, I cannot let you post that.  You have entered invalid Unicode characters in your Website URL that could break my site') );
+  }
+  $bad_unicode = preg_replace($re, "", $comment_content);
+  if( strlen($bad_unicode) != 0 ) {
+    wp_die( __('Sorry, I cannot let you post that.  You have entered invalid Unicode characters in your Comment that could break my site') );
+  }
+
 // If the user is logged in
 $user = wp_get_current_user();
 if ( $user->ID ) {
