Changeset 11278 for trunk/wp-admin/import/dotclear.php
- Timestamp:
- 05/11/2009 08:26:27 PM (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/wp-admin/import/dotclear.php
r11204 r11278 49 49 } 50 50 51 /*52 Identify UTF-8 text53 Taken from http://www.php.net/manual/fr/function.mb-detect-encoding.php#5008754 */55 //56 // utf8 encoding validation developed based on Wikipedia entry at:57 // http://en.wikipedia.org/wiki/UTF-858 //59 // Implemented as a recursive descent parser based on a simple state machine60 // copyright 2005 Maarten Meijer61 //62 // This cries out for a C-implementation to be included in PHP core63 //64 65 51 /** 66 * @package WordPress 67 * @subpackage Dotclear_Import 68 * 69 * @param string $char 70 * @return string 71 */ 72 function valid_1byte($char) { 73 if(!is_int($char)) return false; 74 return ($char & 0x80) == 0x00; 75 } 76 77 /** 78 * @package WordPress 79 * @subpackage Dotclear_Import 80 * 81 * @param string $char 82 * @return string 83 */ 84 function valid_2byte($char) { 85 if(!is_int($char)) return false; 86 return ($char & 0xE0) == 0xC0; 87 } 88 89 /** 90 * @package WordPress 91 * @subpackage Dotclear_Import 92 * 93 * @param string $char 94 * @return string 95 */ 96 function valid_3byte($char) { 97 if(!is_int($char)) return false; 98 return ($char & 0xF0) == 0xE0; 99 } 100 101 /** 102 * @package WordPress 103 * @subpackage Dotclear_Import 104 * 105 * @param string $char 106 * @return string 107 */ 108 function valid_4byte($char) { 109 if(!is_int($char)) return false; 110 return ($char & 0xF8) == 0xF0; 111 } 112 113 /** 114 * @package WordPress 115 * @subpackage Dotclear_Import 116 * 117 * @param string $char 118 * @return string 119 */ 120 function valid_nextbyte($char) { 121 if(!is_int($char)) return false; 122 return ($char & 0xC0) == 0x80; 123 } 124 125 /** 126 * @package WordPress 127 * @subpackage Dotclear_Import 128 * 129 * @param string $string 130 * @return string 131 */ 132 function valid_utf8($string) { 133 $len = strlen($string); 134 $i = 0; 135 while( $i < $len ) { 136 $char = ord(substr($string, $i++, 1)); 137 if(valid_1byte($char)) { // continue 138 continue; 139 } else if(valid_2byte($char)) { // check 1 byte 140 if(!valid_nextbyte(ord(substr($string, $i++, 1)))) 141 return false; 142 } else if(valid_3byte($char)) { // check 2 bytes 143 if(!valid_nextbyte(ord(substr($string, $i++, 1)))) 144 return false; 145 if(!valid_nextbyte(ord(substr($string, $i++, 1)))) 146 return false; 147 } else if(valid_4byte($char)) { // check 3 bytes 148 if(!valid_nextbyte(ord(substr($string, $i++, 1)))) 149 return false; 150 if(!valid_nextbyte(ord(substr($string, $i++, 1)))) 151 return false; 152 if(!valid_nextbyte(ord(substr($string, $i++, 1)))) 153 return false; 154 } // goto next char 155 } 156 return true; // done 157 } 158 159 /** 52 * Convert from dotclear charset to utf8 if required 53 * 160 54 * @package WordPress 161 55 * @subpackage Dotclear_Import … … 165 59 */ 166 60 function csc ($s) { 167 if ( valid_utf8 ($s)) {61 if (seems_utf8 ($s)) { 168 62 return $s; 169 63 } else {
Note: See TracChangeset
for help on using the changeset viewer.