Changeset 11414
- Timestamp:
- 05/20/2009 09:13:14 PM (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/wp-includes/formatting.php
r11394 r11414 173 173 * Checks to see if a string is utf8 encoded. 174 174 * 175 * @author bmorel at ssi dot fr 176 * 175 * NOTE: This function checks for 5-Byte sequences, UTF8 176 * has Bytes Sequences with a maximum length of 4. 177 * 178 * @author bmorel at ssi dot fr (modified) 177 179 * @since 1.2.1 178 180 * 179 * @param string $ Str The string to be checked180 * @return bool True if $ Str fits a UTF-8 model, false otherwise.181 */ 182 function seems_utf8( $Str) { # by bmorel at ssi dot fr183 $length = strlen($ Str);181 * @param string $str The string to be checked 182 * @return bool True if $str fits a UTF-8 model, false otherwise. 183 */ 184 function seems_utf8(&$str) { 185 $length = strlen($str); 184 186 for ($i=0; $i < $length; $i++) { 185 if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb 186 elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb 187 elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb 188 elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb 189 elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb 190 elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b 187 $c = ord($str[$i]); 188 if ($c < 0x80) $n = 0; # 0bbbbbbb 189 elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb 190 elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb 191 elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb 192 elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb 193 elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b 191 194 else return false; # Does not match any model 192 195 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 193 if ((++$i == $length) || ((ord($ Str[$i]) & 0xC0) != 0x80))194 return false;196 if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80)) 197 return false; 195 198 } 196 199 }
Note: See TracChangeset
for help on using the changeset viewer.