719 | | // Store the site charset as a static to avoid multiple calls to get_option() |
720 | | static $is_utf8; |
721 | | if ( !isset( $is_utf8 ) ) { |
722 | | $is_utf8 = in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ); |
| 722 | // Store the site charset and whether pcre_utf8 is enabled |
| 723 | static $is_utf8, $pcre_utf8; |
| 724 | |
| 725 | // if first time this function is called, save boolean if utf-8 enabled or not static |
| 726 | if ( ! isset( $is_utf8 ) ) { |
| 727 | $is_utf8 = get_option( 'blog_charset' ); |
| 728 | $is_utf8 = ( 'UTF-8' == $is_utf8 || 'UTF8' == $is_utf8 || 'utf-8' == $is_utf8 || 'utf8' == $is_utf8 ); |
| 729 | |
| 730 | // Check for support for utf8 /u modifier support in the installed PCRE library once |
| 731 | $pcre_utf8 = ( @preg_match( '//u', '' ) !== false ); |
738 | | // preg_match fails when it encounters invalid UTF8 in $string |
739 | | if ( 1 === @preg_match( '/^./us', $string ) ) { |
740 | | return $string; |
| 739 | // If pcre_utf support is available and string is valid, return string |
| 740 | if ( $pcre_utf8 ) { |
| 741 | if ( preg_match( '//u', $string ) !== false ) { |
| 742 | return $string; |
| 743 | } |
| 744 | } else { |
| 745 | // If pattern option is available then test the string and return if valid. PCRE added (*UTF8) in Version 7.9 11-Apr-09 |
| 746 | if ( @preg_match( '/(*UTF8)/', '' ) !== false && preg_match( '/(*UTF8)/', $string ) !== false ) { |
| 747 | return $string; |
| 748 | } |
| 749 | |
| 750 | // if no pcre support, use htmlspecialchars to check for an empty return which equals invalid utf, otherwise return valid string |
| 751 | if ( htmlspecialchars( $string, null, 'utf-8' ) != '' ) { |
| 752 | return $string; |
| 753 | } |
743 | | // Attempt to strip the bad chars if requested (not recommended) |
744 | | if ( $strip && function_exists( 'iconv' ) ) { |
745 | | return iconv( 'utf-8', 'utf-8', $string ); |
| 756 | // Attempt to strip the bad chars if requested |
| 757 | if ( $strip ) { |
| 758 | // whether mb_convert_encoding should be used (preferred over iconv) |
| 759 | static $mb_convert; |
| 760 | |
| 761 | if ( ! isset( $mb_convert ) ) { |
| 762 | // IF mbstring extension is present, and setting the substitute_character to none works |
| 763 | $mb_convert = ( function_exists( 'mb_substitute_character' ) && mb_substitute_character( 'none' ) === true ); |
| 764 | |
| 765 | if ( ! $mb_convert ) { |
| 766 | // Whether iconv is available |
| 767 | static $iconv; |
| 768 | |
| 769 | // This extension is enabled by default, although it may be disabled by compiling using --without-iconv, or may be the wrong iconv lib |
| 770 | $iconv = ( function_exists( 'iconv' ) && defined( ICONV_IMPL ) && ICONV_IMPL == 'libiconv' ); |
| 771 | } |
| 772 | } |
| 773 | |
| 774 | // Use mb_convert_encoding, return string minus invalid utf |
| 775 | if ( $mb_convert ) { |
| 776 | return mb_convert_encoding( $string, 'UTF-8', 'UTF-8' ); |
| 777 | } elseif ( $iconv ) { |
| 778 | // Characters that cannot be represented in the target charset are silently discarded. Needs '@' see _php_iconv_show_error |
| 779 | return @iconv( 'UTF-8', 'UTF-8//IGNORE', $string ); |
| 780 | } |