### Eclipse Workspace Patch 1.0
#P wordpress-trunk
|
|
|
246 | 246 | /** |
247 | 247 | * Checks to see if a string is utf8 encoded. |
248 | 248 | * |
249 | | * NOTE: This function checks for 5-Byte sequences, UTF8 |
| 249 | * NOTE: This function checks for 6-Byte sequences, UTF8 |
250 | 250 | * has Bytes Sequences with a maximum length of 4. |
251 | 251 | * |
252 | 252 | * @author bmorel at ssi dot fr (modified) |
… |
… |
|
255 | 255 | * @param string $str The string to be checked |
256 | 256 | * @return bool True if $str fits a UTF-8 model, false otherwise. |
257 | 257 | */ |
258 | | function seems_utf8($str) { |
259 | | $length = strlen($str); |
260 | | for ($i=0; $i < $length; $i++) { |
261 | | $c = ord($str[$i]); |
262 | | if ($c < 0x80) $n = 0; # 0bbbbbbb |
263 | | elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb |
264 | | elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb |
265 | | elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb |
266 | | elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb |
267 | | elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b |
| 258 | function seems_utf8( $str ) { |
| 259 | $str = (string) $str; |
| 260 | $length = strlen( $str ); |
| 261 | for ( $i = 0; $i < $length; $i++ ) { |
| 262 | $c = ord( $str[$i] ); |
| 263 | if ( $c < 0x80 ) $n = 0; # 0bbbbbbb |
| 264 | elseif ( ($c & 0xE0) == 0xC0 ) $n = 1; # 110bbbbb |
| 265 | elseif ( ($c & 0xF0) == 0xE0 ) $n = 2; # 1110bbbb |
| 266 | elseif ( ($c & 0xF8) == 0xF0 ) $n = 3; # 11110bbb |
| 267 | elseif ( ($c & 0xFC) == 0xF8 ) $n = 4; # 111110bb // invalid UTF-8, in here |
| 268 | elseif ( ($c & 0xFE) == 0xFC ) $n = 5; # 1111110b // for backcompat reasons |
268 | 269 | else return false; # Does not match any model |
269 | | for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? |
270 | | if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80)) |
| 270 | for ( $j = 0; $j < $n; $j++ ) { # n bytes matching 10bbbbbb follow ? |
| 271 | if ( (++$i == $length) || ( (ord( $str[$i] ) & 0xC0) != 0x80) ) |
271 | 272 | return false; |
272 | 273 | } |
273 | 274 | } |