| 713 | | if ( !isset( $is_utf8 ) ) { |
| 714 | | $is_utf8 = in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ); |
| 715 | | } |
| 716 | | if ( !$is_utf8 ) { |
| | 713 | |
| | 714 | // if first time this function is called, save if utf enabled or not static |
| | 715 | if ( ! isset( $is_utf8 ) ) |
| | 716 | $is_utf8 = ( stripos( get_option( 'blog_charset' ), 'utf' ) !== false ); |
| | 717 | |
| | 718 | // if utf not used return the string |
| | 719 | if ( ! $is_utf8 ) |
| 721 | | static $utf8_pcre; |
| 722 | | if ( !isset( $utf8_pcre ) ) { |
| 723 | | $utf8_pcre = @preg_match( '/^./u', 'a' ); |
| 724 | | } |
| 725 | | // We can't demand utf8 in the PCRE installation, so just return the string in those cases |
| 726 | | if ( !$utf8_pcre ) { |
| | 726 | if ( ! isset( $pcre_utf8 ) ) |
| | 727 | $pcre_utf8 = ( @preg_match( '//u', '' ) !== false ); |
| | 728 | |
| | 729 | // If pcre_utf /u modifier allowed and the utf is valid, return string |
| | 730 | if ( $pcre_utf8 && @preg_match( '//u', $string ) !== false ) |
| 730 | | // preg_match fails when it encounters invalid UTF8 in $string |
| 731 | | if ( 1 === @preg_match( '/^./us', $string ) ) { |
| 732 | | return $string; |
| | 733 | // just means that the /u modifier is disallowed, so try the pattern option |
| | 734 | if ( ! $pcre_utf8 ) { |
| | 735 | |
| | 736 | // Use the pattern option for pcre and return string if valid. Else use the regex |
| | 737 | if ( @preg_match( '/(*UTF8)/', '' ) !== false ) { |
| | 738 | return $string; |
| | 739 | } else { |
| | 740 | |
| | 741 | // pcre was compiled explicitly to forbid the support of UTF. So use a regex to check (third times the charm) |
| | 742 | $pattern = '/( |
| | 743 | [\xC0-\xC1] # Invalid UTF-8 Bytes |
| | 744 | | [\xF5-\xFF] # Invalid UTF-8 Bytes |
| | 745 | | \xE0[\x80-\x9F] # Overlong encoding of prior code point |
| | 746 | | \xF0[\x80-\x8F] # Overlong encoding of prior code point |
| | 747 | | [\xC2-\xDF](?![\x80-\xBF]) # Invalid UTF-8 Sequence Start |
| | 748 | | [\xE0-\xEF](?![\x80-\xBF]{2}) # Invalid UTF-8 Sequence Start |
| | 749 | | [\xF0-\xF4](?![\x80-\xBF]{3}) # Invalid UTF-8 Sequence Start |
| | 750 | | (?<=[\x0-\x7F\xF5-\xFF])[\x80-\xBF] # Invalid UTF-8 Sequence Middle |
| | 751 | | (?<![\xC2-\xDF]|[\xE0-\xEF]|[\xE0-\xEF][\x80-\xBF]|[\xF0-\xF4]|[\xF0-\xF4][\x80-\xBF]|[\xF0-\xF4][\x80-\xBF]{2})[\x80-\xBF] # Overlong Sequence |
| | 752 | | (?<=[\xE0-\xEF])[\x80-\xBF](?![\x80-\xBF]) # Short 3 byte sequence |
| | 753 | | (?<=[\xF0-\xF4])[\x80-\xBF](?![\x80-\xBF]{2}) # Short 4 byte sequence |
| | 754 | | (?<=[\xF0-\xF4][\x80-\xBF])[\x80-\xBF](?![\x80-\xBF]) # Short 4 byte sequence (2) |
| | 755 | )/x'; |
| | 756 | |
| | 757 | // if the utf is valid return the string |
| | 758 | if ( @preg_match( $pattern , $string ) === 1 ) |
| | 759 | return $string; |
| | 760 | } |
| 736 | | if ( $strip && function_exists( 'iconv' ) ) { |
| 737 | | return iconv( 'utf-8', 'utf-8', $string ); |
| | 764 | if ( $strip ) { |
| | 765 | // try to use iconv if exists |
| | 766 | if ( function_exists( 'iconv' ) ) |
| | 767 | return @iconv( 'utf-8', 'utf-8//ignore', $string ); |
| | 768 | |
| | 769 | // otherwise try to use mb_convert_encoding, setting the substitue_character to none to mimic strip |
| | 770 | if ( function_exists( 'mb_convert_encoding' ) ) { |
| | 771 | @ini_set( 'mbstring.substitute_character', 'none' ); |
| | 772 | return @mb_convert_encoding( $string, 'utf-8', 'utf-8' ); |
| | 773 | } |