| 2069 | | $words_array = preg_split( "/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY ); |
| 2070 | | if ( count( $words_array ) > $num_words ) { |
| 2071 | | array_pop( $words_array ); |
| 2072 | | $text = implode( ' ', $words_array ); |
| 2073 | | $text = $text . $more; |
| 2074 | | } else { |
| 2075 | | $text = implode( ' ', $words_array ); |
| | 2069 | |
| | 2070 | /* translators: How would you like to count Latin (English, French, etc.)? |
| | 2071 | translate this to 'characters' if you think 'two words' shoule be counted as 7, |
| | 2072 | translate this to 'words' if you think 'two words' should be counted as 2. |
| | 2073 | Default: 'words'. |
| | 2074 | */ |
| | 2075 | $count_latin_by = 'characters' == _x( 'Count Latin by', 'Latin word count: as words or characters?' ) ? 'characters' : 'words'; |
| | 2076 | |
| | 2077 | /* translators: If you set countLatinBy to 'characters', would you count spaces? |
| | 2078 | translate this to 'yes' if you want spaces to be counted, |
| | 2079 | translate this to 'no' if you do not want spaces to be counted. |
| | 2080 | Default: 'no'. |
| | 2081 | */ |
| | 2082 | $count_spaces = 'yes' == _x( 'Count Latin spaces', 'Latin spaces count: yes or no?' ) ? true : false; |
| | 2083 | |
| | 2084 | /* translators: Would you like to count punctuation marks for East Asia text? |
| | 2085 | translate this to 'yes' if you want punctuation marks to be counted, |
| | 2086 | translate this to 'no' if you do not want punctuation marks to be counted. |
| | 2087 | Default: 'yes'. |
| | 2088 | */ |
| | 2089 | $count_eastasia_punc = 'no' == _x( 'Count East Asia punctuation marks', 'East Asia punctuation marks count: yes or no?' ) ? false : true; |
| | 2090 | |
| | 2091 | /* translators: Would you like to cut a Latin word apart to fit the word limit? |
| | 2092 | translate this to 'yes' if you allow a word to be cut if it is too long, |
| | 2093 | translate this to 'no' if you do not allow a word to be cut if it is too long. |
| | 2094 | Default: 'no'. |
| | 2095 | */ |
| | 2096 | $break_words = 'yes' == _x( 'Break Latin words when trimming text to a certain number of words', 'For a long word, should I break it apart to fit the word limit: yes or no?' ) ? true : false; |
| | 2097 | |
| | 2098 | @mb_internal_encoding( get_option( 'blog_charset' ) ); |
| | 2099 | |
| | 2100 | $subject = $text; |
| | 2101 | $text = ''; |
| | 2102 | |
| | 2103 | $preg_eastasia_char = '[\x{3100}-\x{312F}\x{31A0}-\x{31BF}\x{4E00}-\x{9FCF}\x{3400}-\x{4DBF}\x{F900}-\x{FAFF}\x{2F00}-\x{2FDF}\x{2E80}-\x{2EFF}\x{31C0}-\x{31EF}\x{2FF0}-\x{2FFF}\x{1100}-\x{11FF}\x{A960}-\x{A97F}\x{D780}-\x{D7FF}\x{3130}-\x{318F}\x{FFA0}-\x{FFDC}\x{AC00}-\x{D7AF}\x{3040}-\x{309F}\x{30A0}-\x{30FF}\x{31F0}-\x{31FF}\x{FF65}-\x{FF9F}\x{3190}-\x{319F}\x{A4D0}-\x{A4FF}\x{A000}-\x{A48F}\x{A490}-\x{A4CF}]'; |
| | 2104 | $preg_eastasia_punc = '[\x{3000}-\x{303F}\x{FE30}-\x{FE4F}\x{FF01}-\x{FF60}\x{FE10}-\x{FE1F}]'; |
| | 2105 | $preg_latin = '[A-Za-z0-9\x{0080}-\x{00FF}\x{0100}-\x{017F}\x{0180}-\x{024F}\x{2C60}-\x{2C7F}\x{A720}-\x{A7FF}\x{1E00}-\x{1EFF}\x{0027}\x{2019}\x{2010}-\x{2015}-]'; |
| | 2106 | $preg_latin_punc = '[0-9.(),;:!?%#$¿\'"_+=\\/-]'; |
| | 2107 | |
| | 2108 | $i = 0; |
| | 2109 | $words = 0; |
| | 2110 | while ( true ) { |
| | 2111 | // Should anything fails, this prevents us from going into an infinite loop |
| | 2112 | if ( $i++ >= $num_words * 10 ) |
| | 2113 | break; |
| | 2114 | |
| | 2115 | if ( trim( $subject ) == '' ) |
| | 2116 | break; |
| | 2117 | |
| | 2118 | if ( $words >= $num_words ) |
| | 2119 | break; |
| | 2120 | |
| | 2121 | $fragment_words = 0; |
| | 2122 | |
| | 2123 | $subject = preg_replace_callback( |
| | 2124 | "/^(?:$preg_eastasia_char|$preg_eastasia_punc|(?<Latin>$preg_latin+$preg_latin_punc*)|.)\s*/u", |
| | 2125 | create_function( |
| | 2126 | '$matches', |
| | 2127 | 'global $wp_trim_words_fragment_matches; $wp_trim_words_fragment_matches = $matches; return \'\';' |
| | 2128 | ), |
| | 2129 | $subject, |
| | 2130 | 1 |
| | 2131 | ); |
| | 2132 | |
| | 2133 | // XXX workaround to get matches from preg_replace_callback() |
| | 2134 | global $wp_trim_words_fragment_matches; |
| | 2135 | $fragment = $orig_fragment = $wp_trim_words_fragment_matches[0]; |
| | 2136 | $is_fragment_latin = isset( $wp_trim_words_fragment_matches['Latin'] ) ? true : false; |
| | 2137 | |
| | 2138 | if ( $is_fragment_latin && $count_latin_by == 'words' ) { |
| | 2139 | $text .= $orig_fragment; |
| | 2140 | $words++; |
| | 2141 | continue; |
| | 2142 | } |
| | 2143 | |
| | 2144 | if ( ! $count_eastasia_punc ) |
| | 2145 | $fragment = preg_replace( "/$preg_eastasia_punc/u", '', $fragment ); |
| | 2146 | |
| | 2147 | if ( ! $count_spaces ) |
| | 2148 | $fragment = trim( $fragment ); |
| | 2149 | |
| | 2150 | $fragment_words += mb_strlen( $fragment ); |
| | 2151 | |
| | 2152 | if ( $words + $fragment_words > $num_words ) { |
| | 2153 | if ( ! $break_words || $count_latin_by == 'words' ) |
| | 2154 | break; |
| | 2155 | |
| | 2156 | $fragment_chars = preg_split('/(?<!^)(?!$)/u', $orig_fragment ); |
| | 2157 | |
| | 2158 | $j = 0; |
| | 2159 | $fragment_words = 0; |
| | 2160 | while ( true ) { |
| | 2161 | if ( ++$j > mb_strlen( $orig_fragment ) ) |
| | 2162 | break 2; |
| | 2163 | |
| | 2164 | if ( $words + $fragment_words >= $num_words ) |
| | 2165 | break 2; |
| | 2166 | |
| | 2167 | $text .= $fragment_chars[0]; |
| | 2168 | $fragment_words++; |
| | 2169 | $words++; |
| | 2170 | array_shift( $fragment_chars ); |
| | 2171 | } |
| | 2172 | } |
| | 2173 | |
| | 2174 | $text .= $orig_fragment; |
| | 2175 | $words += $fragment_words; |