2069 | | $words_array = preg_split( "/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY ); |
2070 | | if ( count( $words_array ) > $num_words ) { |
2071 | | array_pop( $words_array ); |
2072 | | $text = implode( ' ', $words_array ); |
2073 | | $text = $text . $more; |
2074 | | } else { |
2075 | | $text = implode( ' ', $words_array ); |
| 2069 | |
| 2070 | /* translators: How would you like to count Latin (English, French, etc.)? |
| 2071 | translate this to 'characters' if you think 'two words' shoule be counted as 7, |
| 2072 | translate this to 'words' if you think 'two words' should be counted as 2. |
| 2073 | Default: 'words'. |
| 2074 | */ |
| 2075 | $count_latin_by = 'characters' == _x( 'Count Latin by', 'Latin word count: as words or characters?' ) ? 'characters' : 'words'; |
| 2076 | |
| 2077 | /* translators: If you set countLatinBy to 'characters', would you count spaces? |
| 2078 | translate this to 'yes' if you want spaces to be counted, |
| 2079 | translate this to 'no' if you do not want spaces to be counted. |
| 2080 | Default: 'no'. |
| 2081 | */ |
| 2082 | $count_spaces = 'yes' == _x( 'Count Latin spaces', 'Latin spaces count: yes or no?' ) ? true : false; |
| 2083 | |
| 2084 | /* translators: Would you like to count punctuation marks for East Asia text? |
| 2085 | translate this to 'yes' if you want punctuation marks to be counted, |
| 2086 | translate this to 'no' if you do not want punctuation marks to be counted. |
| 2087 | Default: 'yes'. |
| 2088 | */ |
| 2089 | $count_eastasia_punc = 'no' == _x( 'Count East Asia punctuation marks', 'East Asia punctuation marks count: yes or no?' ) ? false : true; |
| 2090 | |
| 2091 | /* translators: Would you like to cut a Latin word apart to fit the word limit? |
| 2092 | translate this to 'yes' if you allow a word to be cut if it is too long, |
| 2093 | translate this to 'no' if you do not allow a word to be cut if it is too long. |
| 2094 | Default: 'yes'. |
| 2095 | */ |
| 2096 | $break_words = 'yes' == _x( 'Break Latin words when trimming text to a certain number of words', 'For a long word, should I break it apart to fit the word limit: yes or no?' ) ? true : false; |
| 2097 | |
| 2098 | @mb_internal_encoding( get_option( 'blog_charset' ) ); |
| 2099 | |
| 2100 | $subject = $text; |
| 2101 | $text = ''; |
| 2102 | |
| 2103 | $preg_eastasia_char = '[\x{3100}-\x{312F}\x{31A0}-\x{31BF}\x{4E00}-\x{9FCF}\x{3400}-\x{4DBF}\x{F900}-\x{FAFF}\x{2F00}-\x{2FDF}\x{2E80}-\x{2EFF}\x{31C0}-\x{31EF}\x{2FF0}-\x{2FFF}\x{1100}-\x{11FF}\x{A960}-\x{A97F}\x{D780}-\x{D7FF}\x{3130}-\x{318F}\x{FFA0}-\x{FFDC}\x{AC00}-\x{D7AF}\x{3040}-\x{309F}\x{30A0}-\x{30FF}\x{31F0}-\x{31FF}\x{FF65}-\x{FF9F}\x{3190}-\x{319F}\x{A4D0}-\x{A4FF}\x{A000}-\x{A48F}\x{A490}-\x{A4CF}]'; |
| 2104 | $preg_eastasia_punc = '[\x{3000}-\x{303F}\x{FE30}-\x{FE4F}\x{FF01}-\x{FF60}\x{FE10}-\x{FE1F}]'; |
| 2105 | |
| 2106 | $preg_latin_punc = '[0-9.(),;:!?%#$¿\'"_+=\\/-]'; |
| 2107 | |
| 2108 | $i = 0; |
| 2109 | $words = 0; |
| 2110 | while ( true ) { |
| 2111 | // Should anything fails, this prevents us from going into an infinite loop |
| 2112 | if ( $i++ >= $num_words * 10 ) |
| 2113 | break; |
| 2114 | |
| 2115 | if ( trim( $subject ) == '' ) |
| 2116 | break; |
| 2117 | |
| 2118 | if ( $words >= $num_words ) |
| 2119 | break; |
| 2120 | |
| 2121 | $fragment_words = 0; |
| 2122 | |
| 2123 | $subject = preg_replace_callback( |
| 2124 | "/^(?:$preg_eastasia_char|$preg_eastasia_punc|(?<Latin>[a-z0-9_]+$preg_latin_punc*)|.)\s*/ui", |
| 2125 | create_function( |
| 2126 | '$matches', |
| 2127 | 'global $wp_trim_words_fragment_matches; $wp_trim_words_fragment_matches = $matches; return \'\';' |
| 2128 | ), |
| 2129 | $subject, |
| 2130 | 1 |
| 2131 | ); |
| 2132 | |
| 2133 | // XXX workaround to get matches from preg_replace_callback() |
| 2134 | global $wp_trim_words_fragment_matches; |
| 2135 | $fragment = $orig_fragment = $wp_trim_words_fragment_matches[0]; |
| 2136 | $is_fragment_latin = isset( $wp_trim_words_fragment_matches['Latin'] ) ? true : false; |
| 2137 | |
| 2138 | if ( $is_fragment_latin && $count_latin_by == 'words' ) { |
| 2139 | $text .= $orig_fragment; |
| 2140 | $words++; |
| 2141 | continue; |
| 2142 | } |
| 2143 | |
| 2144 | if ( ! $count_eastasia_punc ) |
| 2145 | $fragment = preg_replace( "/$preg_eastasia_punc/u", '', $fragment ); |
| 2146 | |
| 2147 | if ( ! $count_spaces ) |
| 2148 | $fragment = trim( $fragment ); |
| 2149 | |
| 2150 | $fragment_words += mb_strlen( $fragment ); |
| 2151 | |
| 2152 | if ( $words + $fragment_words > $num_words ) { |
| 2153 | if ( ! $break_words || $count_latin_by == 'words' ) |
| 2154 | break; |
| 2155 | |
| 2156 | $fragment_chars = preg_split('/(?<!^)(?!$)/u', $orig_fragment ); |
| 2157 | |
| 2158 | $j = 0; |
| 2159 | $fragment_words = 0; |
| 2160 | while ( true ) { |
| 2161 | if ( ++$j > mb_strlen( $orig_fragment ) ) |
| 2162 | break 2; |
| 2163 | |
| 2164 | if ( $words + $fragment_words >= $num_words ) |
| 2165 | break 2; |
| 2166 | |
| 2167 | $text .= $fragment_chars[0]; |
| 2168 | $fragment_words++; |
| 2169 | $words++; |
| 2170 | array_shift( $fragment_chars ); |
| 2171 | } |
| 2172 | } |
| 2173 | |
| 2174 | $text .= $orig_fragment; |
| 2175 | $words += $fragment_words; |