### Eclipse Workspace Patch 1.0
#P wordpress-trunk
|
|
|
|
| 246 | 246 | /** |
| 247 | 247 | * Checks to see if a string is utf8 encoded. |
| 248 | 248 | * |
| 249 | | * NOTE: This function checks for 5-Byte sequences, UTF8 |
| | 249 | * NOTE: This function checks for 6-Byte sequences, UTF8 |
| 250 | 250 | * has Bytes Sequences with a maximum length of 4. |
| 251 | 251 | * |
| 252 | 252 | * @author bmorel at ssi dot fr (modified) |
| … |
… |
|
| 255 | 255 | * @param string $str The string to be checked |
| 256 | 256 | * @return bool True if $str fits a UTF-8 model, false otherwise. |
| 257 | 257 | */ |
| 258 | | function seems_utf8($str) { |
| 259 | | $length = strlen($str); |
| 260 | | for ($i=0; $i < $length; $i++) { |
| 261 | | $c = ord($str[$i]); |
| 262 | | if ($c < 0x80) $n = 0; # 0bbbbbbb |
| 263 | | elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb |
| 264 | | elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb |
| 265 | | elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb |
| 266 | | elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb |
| 267 | | elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b |
| | 258 | function seems_utf8( $str ) { |
| | 259 | $str = (string) $str; |
| | 260 | $length = strlen( $str ); |
| | 261 | for ( $i = 0; $i < $length; $i++ ) { |
| | 262 | $c = ord( $str[$i] ); |
| | 263 | if ( $c < 0x80 ) $n = 0; # 0bbbbbbb |
| | 264 | elseif ( ($c & 0xE0) == 0xC0 ) $n = 1; # 110bbbbb |
| | 265 | elseif ( ($c & 0xF0) == 0xE0 ) $n = 2; # 1110bbbb |
| | 266 | elseif ( ($c & 0xF8) == 0xF0 ) $n = 3; # 11110bbb |
| | 267 | elseif ( ($c & 0xFC) == 0xF8 ) $n = 4; # 111110bb // invalid UTF-8, in here |
| | 268 | elseif ( ($c & 0xFE) == 0xFC ) $n = 5; # 1111110b // for backcompat reasons |
| 268 | 269 | else return false; # Does not match any model |
| 269 | | for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? |
| 270 | | if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80)) |
| | 270 | for ( $j = 0; $j < $n; $j++ ) { # n bytes matching 10bbbbbb follow ? |
| | 271 | if ( (++$i == $length) || ( (ord( $str[$i] ) & 0xC0) != 0x80) ) |
| 271 | 272 | return false; |
| 272 | 273 | } |
| 273 | 274 | } |