Index: formatting.php
===================================================================
--- formatting.php	(revision 29689)
+++ formatting.php	(working copy)
@@ -703,48 +703,84 @@
 /**
  * Checks for invalid UTF8 in a string.
  *
+ * Could change the ini_setting mbstring.substitute_character to 'none' without restoring. 
+ *
  * @since 2.8.0
  *
  * @param string $string The text which is to be checked.
  * @param boolean $strip Optional. Whether to attempt to strip out invalid UTF8. Default is false.
- * @return string The checked text.
+ * @return string If the string is valid UTF-8 or the blog_charset is not UTF-8, the string is returned unmodified. Otherwise, an empty string is returned, or optionally the string stripped of invalid chars. 
  */
 function wp_check_invalid_utf8( $string, $strip = false ) {
 	$string = (string) $string;
 
-	if ( 0 === strlen( $string ) ) {
+	// if string length is 0 (faster than strlen) return empty
+	if ( ! isset( $string[0] ) ) {
 		return '';
 	}
 
-	// Store the site charset as a static to avoid multiple calls to get_option()
-	static $is_utf8;
-	if ( !isset( $is_utf8 ) ) {
-		$is_utf8 = in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) );
+	// Store the site charset and whether pcre_utf8 is enabled
+	static $is_utf8, $pcre_utf8;
+
+	// if first time this function is called, save boolean if utf-8 enabled or not static
+	if ( ! isset( $is_utf8 ) ) {
+		$is_utf8 = get_option( 'blog_charset' );
+		$is_utf8 = ( 'UTF-8' == $is_utf8 || 'UTF8' == $is_utf8 || 'utf-8' == $is_utf8 || 'utf8' == $is_utf8 );
+
+		// Check for support for utf8 /u modifier support in the installed PCRE library once
+		$pcre_utf8 = ( @preg_match( '//u', '' ) !== false );
 	}
-	if ( !$is_utf8 ) {
-		return $string;
-	}
 
-	// Check for support for utf8 in the installed PCRE library once and store the result in a static
-	static $utf8_pcre;
-	if ( !isset( $utf8_pcre ) ) {
-		$utf8_pcre = @preg_match( '/^./u', 'a' );
-	}
-	// We can't demand utf8 in the PCRE installation, so just return the string in those cases
-	if ( !$utf8_pcre ) {
+	// if utf not used return the string unmodified
+	if ( ! $is_utf8 ) {
 		return $string;
 	}
 
-	// preg_match fails when it encounters invalid UTF8 in $string
-	if ( 1 === @preg_match( '/^./us', $string ) ) {
-		return $string;
+	// If pcre_utf support is available and string is valid, return string
+	if ( $pcre_utf8 ) {
+		if ( preg_match( '//u', $string ) !== false ) {
+			return $string;
+		}
+	} else {
+		// If pattern option is available then test the string and return if valid. PCRE added (*UTF8) in Version 7.9 11-Apr-09
+		if ( @preg_match( '/(*UTF8)/', '' ) !== false && preg_match( '/(*UTF8)/', $string ) !== false ) {
+			return $string;
+		}
+
+		// if no pcre support, use htmlspecialchars to check for an empty return which equals invalid utf, otherwise return valid string
+		if ( htmlspecialchars( $string, null, 'utf-8' ) != '' ) {
+			return $string;
+		}
 	}
 
-	// Attempt to strip the bad chars if requested (not recommended)
-	if ( $strip && function_exists( 'iconv' ) ) {
-		return iconv( 'utf-8', 'utf-8', $string );
+	// Attempt to strip the bad chars if requested
+	if ( $strip ) {
+		// whether mb_convert_encoding should be used (preferred over iconv)
+		static $mb_convert;
+
+		if ( ! isset( $mb_convert ) ) {
+			// IF mbstring extension is present, and setting the substitute_character to none works
+			$mb_convert = ( function_exists( 'mb_substitute_character' ) && mb_substitute_character( 'none' ) === true );
+
+			if ( ! $mb_convert ) {
+				// Whether iconv is available
+				static $iconv;
+
+				// This extension is enabled by default, although it may be disabled by compiling using --without-iconv, or may be the wrong iconv lib
+				$iconv = ( function_exists( 'iconv' ) && defined( ICONV_IMPL ) && ICONV_IMPL == 'libiconv' );
+			}
+		}
+
+		// Use mb_convert_encoding, return string minus invalid utf
+		if ( $mb_convert ) {
+			return mb_convert_encoding( $string, 'UTF-8', 'UTF-8' );
+		} elseif ( $iconv ) {
+			// Characters that cannot be represented in the target charset are silently discarded.  Needs '@' see _php_iconv_show_error
+			return @iconv( 'UTF-8', 'UTF-8//IGNORE', $string );
+		}
 	}
 
+	// default to returning empty string, meaning invalid utf was found
 	return '';
 }
 
