Changeset 60950
- Timestamp:
- 10/16/2025 11:17:14 PM (5 months ago)
- Location:
- trunk
- Files:
-
- 1 added
- 2 edited
-
src/wp-includes/compat-utf8.php (modified) (1 diff)
-
src/wp-includes/compat.php (modified) (1 diff)
-
tests/phpunit/tests/formatting/deprecatedUtfEncodeDecode.php (added)
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/compat-utf8.php
r60949 r60950 338 338 return $count; 339 339 } 340 341 /** 342 * Converts a string from ISO-8859-1 to UTF-8, maintaining backwards compatibility 343 * with the deprecated function from the PHP standard library. 344 * 345 * @since 6.9.0 346 * @access private 347 * 348 * @see \utf8_encode() 349 * 350 * @param string $iso_8859_1_text Text treated as ISO-8859-1 (latin1) bytes. 351 * @return string Text converted into UTF-8. 352 */ 353 function _wp_utf8_encode_fallback( $iso_8859_1_text ) { 354 $iso_8859_1_text = (string) $iso_8859_1_text; 355 $at = 0; 356 $was_at = 0; 357 $end = strlen( $iso_8859_1_text ); 358 $utf8 = ''; 359 360 while ( $at < $end ) { 361 // US-ASCII bytes are identical in ISO-8859-1 and UTF-8. These are 0x00–0x7F. 362 $ascii_byte_count = strspn( 363 $iso_8859_1_text, 364 "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" . 365 "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" . 366 " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f", 367 $at 368 ); 369 370 if ( $ascii_byte_count > 0 ) { 371 $at += $ascii_byte_count; 372 continue; 373 } 374 375 // All other bytes transform into two-byte UTF-8 sequences. 376 $code_point = ord( $iso_8859_1_text[ $at ] ); 377 $byte1 = chr( 0xC0 | ( $code_point >> 6 ) ); 378 $byte2 = chr( 0x80 | ( $code_point & 0x3F ) ); 379 380 $utf8 .= substr( $iso_8859_1_text, $was_at, $at - $was_at ); 381 $utf8 .= "{$byte1}{$byte2}"; 382 383 ++$at; 384 $was_at = $at; 385 } 386 387 if ( 0 === $was_at ) { 388 return $iso_8859_1_text; 389 } 390 391 $utf8 .= substr( $iso_8859_1_text, $was_at ); 392 return $utf8; 393 } 394 395 /** 396 * Converts a string from UTF-8 to ISO-8859-1, maintaining backwards compatibility 397 * with the deprecated function from the PHP standard library. 398 * 399 * @since 6.9.0 400 * @access private 401 * 402 * @see \utf8_decode() 403 * 404 * @param string $utf8_text Text treated as UTF-8 bytes. 405 * @return string Text converted into ISO-8859-1. 406 */ 407 function _wp_utf8_decode_fallback( $utf8_text ) { 408 $utf8_text = (string) $utf8_text; 409 $at = 0; 410 $was_at = 0; 411 $end = strlen( $utf8_text ); 412 $iso_8859_1_text = ''; 413 414 while ( $at < $end ) { 415 // US-ASCII bytes are identical in ISO-8859-1 and UTF-8. These are 0x00–0x7F. 416 $ascii_byte_count = strspn( 417 $utf8_text, 418 "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" . 419 "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" . 420 " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f", 421 $at 422 ); 423 424 if ( $ascii_byte_count > 0 ) { 425 $at += $ascii_byte_count; 426 continue; 427 } 428 429 $next_at = $at; 430 $invalid_length = 0; 431 $found = _wp_scan_utf8( $utf8_text, $next_at, $invalid_length, null, 1 ); 432 $span_length = $next_at - $at; 433 $next_byte = '?'; 434 435 if ( 1 !== $found ) { 436 if ( $invalid_length > 0 ) { 437 $next_byte = ''; 438 goto flush_sub_part; 439 } 440 441 break; 442 } 443 444 // All convertible code points are two-bytes long. 445 $byte1 = ord( $utf8_text[ $at ] ); 446 if ( 0xC0 !== ( $byte1 & 0xE0 ) ) { 447 goto flush_sub_part; 448 } 449 450 // All convertible code points are not greater than U+FF. 451 $byte2 = ord( $utf8_text[ $at + 1 ] ); 452 $code_point = ( ( $byte1 & 0x1F ) << 6 ) | ( ( $byte2 & 0x3F ) ); 453 if ( $code_point > 0xFF ) { 454 goto flush_sub_part; 455 } 456 457 $next_byte = chr( $code_point ); 458 459 flush_sub_part: 460 $iso_8859_1_text .= substr( $utf8_text, $was_at, $at - $was_at ); 461 $iso_8859_1_text .= $next_byte; 462 $at += $span_length; 463 $was_at = $at; 464 465 if ( $invalid_length > 0 ) { 466 $iso_8859_1_text .= '?'; 467 $at += $invalid_length; 468 $was_at = $at; 469 } 470 } 471 472 if ( 0 === $was_at ) { 473 return $utf8_text; 474 } 475 476 $iso_8859_1_text .= substr( $utf8_text, $was_at ); 477 return $iso_8859_1_text; 478 } -
trunk/src/wp-includes/compat.php
r60949 r60950 248 248 } 249 249 250 if ( ! function_exists( 'utf8_encode' ) ) : 251 if ( extension_loaded( 'mbstring' ) ) : 252 /** 253 * Converts a string from ISO-8859-1 to UTF-8. 254 * 255 * @deprecated Use {@see \mb_convert_encoding()} instead. 256 * 257 * @since 6.9.0 258 * 259 * @param string $iso_8859_1_text Text treated as ISO-8859-1 (latin1) bytes. 260 * @return string Text converted into a UTF-8. 261 */ 262 function utf8_encode( $iso_8859_1_text ): string { 263 _deprecated_function( __FUNCTION__, '6.9.0', 'mb_convert_encoding' ); 264 265 return mb_convert_encoding( $iso_8859_1_text, 'UTF-8', 'ISO-8859-1' ); 266 } 267 268 else : 269 /** 270 * @ignore 271 * @private 272 * 273 * @since 6.9.0 274 */ 275 function utf8_encode( $iso_8859_1_text ): string { 276 _deprecated_function( __FUNCTION__, '6.9.0', 'mb_convert_encoding' ); 277 278 return _wp_utf8_encode_fallback( $iso_8859_1_text ); 279 } 280 281 endif; 282 endif; 283 284 if ( ! function_exists( 'utf8_decode' ) ) : 285 if ( extension_loaded( 'mbstring' ) ) : 286 /** 287 * Converts a string from UTF-8 to ISO-8859-1. 288 * 289 * @deprecated Use {@see \mb_convert_encoding()} instead. 290 * 291 * @since 6.9.0 292 * 293 * @param string $utf8_text Text treated as UTF-8. 294 * @return string Text converted into ISO-8859-1. 295 */ 296 function utf8_decode( $utf8_text ): string { 297 _deprecated_function( __FUNCTION__, '6.9.0', 'mb_convert_encoding' ); 298 299 return mb_convert_encoding( $utf8_text, 'ISO-8859-1', 'UTF-8' ); 300 } 301 302 else : 303 /** 304 * @ignore 305 * @private 306 * 307 * @since 6.9.0 308 */ 309 function utf8_decode( $utf8_text ): string { 310 _deprecated_function( __FUNCTION__, '6.9.0', 'mb_convert_encoding' ); 311 312 return _wp_utf8_decode_fallback( $utf8_text ); 313 } 314 315 endif; 316 endif; 317 250 318 // sodium_crypto_box() was introduced in PHP 7.2. 251 319 if ( ! function_exists( 'sodium_crypto_box' ) ) {
Note: See TracChangeset
for help on using the changeset viewer.