Index: wp-includes/formatting.php
===================================================================
--- wp-includes/formatting.php	(revision 20556)
+++ wp-includes/formatting.php	(working copy)
@@ -2066,14 +2066,121 @@
 		$more = __( '&hellip;' );
 	$original_text = $text;
 	$text = wp_strip_all_tags( $text );
-	$words_array = preg_split( "/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY );
-	if ( count( $words_array ) > $num_words ) {
-		array_pop( $words_array );
-		$text = implode( ' ', $words_array );
-		$text = $text . $more;
-	} else {
-		$text = implode( ' ', $words_array );
+	
+	/* translators: How would you like to count Latin (English, French, etc.)?  
+		 translate this to 'characters' if you think 'two words' shoule be counted as 7,  
+		 translate this to 'words' if you think 'two words' should be counted as 2. 
+		 Default: 'words'. 
+	*/
+	$count_latin_by = 'characters' == _x( 'Count Latin by', 'Latin word count: as words or characters?' ) ? 'characters' : 'words';
+	
+	/* translators: If you set countLatinBy to 'characters', would you count spaces? 
+		 translate this to 'yes' if you want spaces to be counted, 
+		 translate this to 'no' if you do not want spaces to be counted. 
+		 Default: 'no'.
+	*/
+	$count_spaces = 'yes' == _x( 'Count Latin spaces', 'Latin spaces count: yes or no?' ) ? true : false;
+	
+	/* translators: Would you like to count punctuation marks for East Asia text? 
+		 translate this to 'yes' if you want punctuation marks to be counted, 
+		 translate this to 'no' if you do not want punctuation marks to be counted.
+		 Default: 'yes'.
+	*/
+	$count_eastasia_punc = 'no' == _x( 'Count East Asia punctuation marks', 'East Asia punctuation marks count: yes or no?' ) ? false : true;
+	
+	/* translators: Would you like to cut a Latin word apart to fit the word limit? 
+		 translate this to 'yes' if you allow a word to be cut if it is too long, 
+		 translate this to 'no' if you do not allow a word to be cut if it is too long.
+		 Default: 'yes'.
+	*/
+	$break_words = 'yes' == _x( 'Break Latin words when trimming text to a certain number of words', 'For a long word, should I break it apart to fit the word limit: yes or no?' ) ? true : false;
+	
+	@mb_internal_encoding( get_option( 'blog_charset' ) );
+	
+	$subject = $text;
+	$text = '';
+	
+	$preg_eastasia_char = '[\x{3100}-\x{312F}\x{31A0}-\x{31BF}\x{4E00}-\x{9FCF}\x{3400}-\x{4DBF}\x{F900}-\x{FAFF}\x{2F00}-\x{2FDF}\x{2E80}-\x{2EFF}\x{31C0}-\x{31EF}\x{2FF0}-\x{2FFF}\x{1100}-\x{11FF}\x{A960}-\x{A97F}\x{D780}-\x{D7FF}\x{3130}-\x{318F}\x{FFA0}-\x{FFDC}\x{AC00}-\x{D7AF}\x{3040}-\x{309F}\x{30A0}-\x{30FF}\x{31F0}-\x{31FF}\x{FF65}-\x{FF9F}\x{3190}-\x{319F}\x{A4D0}-\x{A4FF}\x{A000}-\x{A48F}\x{A490}-\x{A4CF}]';
+	$preg_eastasia_punc = '[\x{3000}-\x{303F}\x{FE30}-\x{FE4F}\x{FF01}-\x{FF60}\x{FE10}-\x{FE1F}]';
+
+	$preg_latin_punc = '[0-9.(),;:!?%#$¿\'"_+=\\/-]';
+
+	$i = 0;
+	$words = 0;
+	while ( true ) {
+		// Should anything fails, this prevents us from going into an infinite loop
+		if ( $i++ >= $num_words * 10 )
+			break;
+	
+		if ( trim( $subject ) == '' )
+			break;
+	
+		if ( $words >= $num_words )
+			break;
+	
+		$fragment_words = 0;
+	
+		$subject = preg_replace_callback( 
+			"/^(?:$preg_eastasia_char|$preg_eastasia_punc|(?<Latin>[a-z0-9_]+$preg_latin_punc*)|.)\s*/ui",
+			create_function(
+				'$matches',
+				'global $wp_trim_words_fragment_matches; $wp_trim_words_fragment_matches = $matches; return \'\';'
+			),
+			$subject, 
+			1 
+		);
+	
+		// XXX workaround to get matches from preg_replace_callback()
+		global $wp_trim_words_fragment_matches;
+		$fragment = $orig_fragment = $wp_trim_words_fragment_matches[0];
+		$is_fragment_latin = isset( $wp_trim_words_fragment_matches['Latin'] ) ? true : false;
+	
+		if ( $is_fragment_latin && $count_latin_by == 'words' ) {
+			$text .= $orig_fragment;
+			$words++;
+			continue;
+		}
+	
+		if ( ! $count_eastasia_punc )
+			$fragment = preg_replace( "/$preg_eastasia_punc/u", '', $fragment );
+	
+		if ( ! $count_spaces )
+			$fragment = trim( $fragment );
+	
+		$fragment_words += mb_strlen( $fragment );
+	
+		if ( $words + $fragment_words > $num_words ) {
+			if ( ! $break_words || $count_latin_by == 'words' )
+				break;
+		
+			$fragment_chars = preg_split('/(?<!^)(?!$)/u', $orig_fragment );
+		
+			$j = 0;
+			$fragment_words = 0;
+			while ( true ) {
+				if ( ++$j > mb_strlen( $orig_fragment ) )
+					break 2;
+			
+				if ( $words + $fragment_words >= $num_words )
+					break 2;
+			
+				$text .= $fragment_chars[0];
+				$fragment_words++;
+				$words++;
+				array_shift( $fragment_chars );
+			}
+		}
+	
+		$text .= $orig_fragment;
+		$words += $fragment_words;
 	}
+
+	// Prevent other scripts from accessing fragments of users' post
+	unset( $GLOBALS['wp_trim_words_fragment_matches'] );
+	
+	if ( $words > $num_words )
+		$text .= $more;
+	
 	return apply_filters( 'wp_trim_words', $text, $num_words, $more, $original_text );
 }
 
