Changeset 32391 for branches/3.7/src/wp-includes/compat.php
- Timestamp:
- 05/06/2015 07:16:41 PM (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/3.7/src/wp-includes/compat.php
r29388 r32391 14 14 } 15 15 16 if ( !function_exists('mb_substr') ): 17 function mb_substr( $str, $start, $length=null, $encoding=null ) { 18 return _mb_substr($str, $start, $length, $encoding); 19 } 20 endif; 21 22 function _mb_substr( $str, $start, $length=null, $encoding=null ) { 23 // the solution below, works only for utf-8, so in case of a different 24 // charset, just use built-in substr 25 $charset = get_option( 'blog_charset' ); 26 if ( !in_array( $charset, array('utf8', 'utf-8', 'UTF8', 'UTF-8') ) ) { 27 return is_null( $length )? substr( $str, $start ) : substr( $str, $start, $length); 28 } 29 // use the regex unicode support to separate the UTF-8 characters into an array 30 preg_match_all( '/./us', $str, $match ); 31 $chars = is_null( $length )? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length ); 32 return implode( '', $chars ); 16 /** 17 * Returns whether PCRE/u (PCRE_UTF8 modifier) is available for use. 18 * 19 * @ignore 20 * @since 4.2.2 21 * @access private 22 * 23 * @param bool $set - Used for testing only 24 * null : default - get PCRE/u capability 25 * false : Used for testing - return false for future calls to this function 26 * 'reset': Used for testing - restore default behavior of this function 27 */ 28 function _wp_can_use_pcre_u( $set = null ) { 29 static $utf8_pcre = 'reset'; 30 31 if ( null !== $set ) { 32 $utf8_pcre = $set; 33 } 34 35 if ( 'reset' === $utf8_pcre ) { 36 $utf8_pcre = @preg_match( '/^./u', 'a' ); 37 } 38 39 return $utf8_pcre; 40 } 41 42 if ( ! function_exists( 'mb_substr' ) ) : 43 function mb_substr( $str, $start, $length = null, $encoding = null ) { 44 return _mb_substr( $str, $start, $length, $encoding ); 45 } 46 endif; 47 48 /* 49 * Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit. 50 * For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence. 51 * The behavior of this function for invalid inputs is undefined. 52 */ 53 function _mb_substr( $str, $start, $length = null, $encoding = null ) { 54 if ( null === $encoding ) { 55 $encoding = get_option( 'blog_charset' ); 56 } 57 58 // The solution below works only for UTF-8, 59 // so in case of a different charset just use built-in substr() 60 if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) { 61 return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length ); 62 } 63 64 if ( _wp_can_use_pcre_u() ) { 65 // Use the regex unicode support to separate the UTF-8 characters into an array 66 preg_match_all( '/./us', $str, $match ); 67 $chars = is_null( $length ) ? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length ); 68 return implode( '', $chars ); 69 } 70 71 $regex = '/( 72 [\x00-\x7F] # single-byte sequences 0xxxxxxx 73 | [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx 74 | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 75 | [\xE1-\xEC][\x80-\xBF]{2} 76 | \xED[\x80-\x9F][\x80-\xBF] 77 | [\xEE-\xEF][\x80-\xBF]{2} 78 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 79 | [\xF1-\xF3][\x80-\xBF]{3} 80 | \xF4[\x80-\x8F][\x80-\xBF]{2} 81 )/x'; 82 83 $chars = array( '' ); // Start with 1 element instead of 0 since the first thing we do is pop 84 do { 85 // We had some string left over from the last round, but we counted it in that last round. 86 array_pop( $chars ); 87 88 // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string) 89 $pieces = preg_split( $regex, $str, 1000, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); 90 91 $chars = array_merge( $chars, $pieces ); 92 } while ( count( $pieces ) > 1 && $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop. 93 94 return join( '', array_slice( $chars, $start, $length ) ); 95 } 96 97 if ( ! function_exists( 'mb_strlen' ) ) : 98 function mb_strlen( $str, $encoding = null ) { 99 return _mb_strlen( $str, $encoding ); 100 } 101 endif; 102 103 /* 104 * Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit. 105 * For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence. 106 * The behavior of this function for invalid inputs is undefined. 107 */ 108 function _mb_strlen( $str, $encoding = null ) { 109 if ( null === $encoding ) { 110 $encoding = get_option( 'blog_charset' ); 111 } 112 113 // The solution below works only for UTF-8, 114 // so in case of a different charset just use built-in strlen() 115 if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) { 116 return strlen( $str ); 117 } 118 119 if ( _wp_can_use_pcre_u() ) { 120 // Use the regex unicode support to separate the UTF-8 characters into an array 121 preg_match_all( '/./us', $str, $match ); 122 return count( $match[0] ); 123 } 124 125 $regex = '/(?: 126 [\x00-\x7F] # single-byte sequences 0xxxxxxx 127 | [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx 128 | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 129 | [\xE1-\xEC][\x80-\xBF]{2} 130 | \xED[\x80-\x9F][\x80-\xBF] 131 | [\xEE-\xEF][\x80-\xBF]{2} 132 | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 133 | [\xF1-\xF3][\x80-\xBF]{3} 134 | \xF4[\x80-\x8F][\x80-\xBF]{2} 135 )/x'; 136 137 $count = 1; // Start at 1 instead of 0 since the first thing we do is decrement 138 do { 139 // We had some string left over from the last round, but we counted it in that last round. 140 $count--; 141 142 // Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string) 143 $pieces = preg_split( $regex, $str, 1000 ); 144 145 // Increment 146 $count += count( $pieces ); 147 } while ( $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop. 148 149 // Fencepost: preg_split() always returns one extra item in the array 150 return --$count; 33 151 } 34 152
Note: See TracChangeset
for help on using the changeset viewer.