WordPress.org

Make WordPress Core

Changeset 11278


Ignore:
Timestamp:
05/11/2009 08:26:27 PM (9 years ago)
Author:
westi
Message:

Switch dotclear importer to use builtin UTF8 testing functionality. Fixes #9771 props hakre.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/wp-admin/import/dotclear.php

    r11204 r11278  
    4949}
    5050
    51 /*
    52  Identify UTF-8 text
    53  Taken from http://www.php.net/manual/fr/function.mb-detect-encoding.php#50087
    54 */
    55 //
    56 //    utf8 encoding validation developed based on Wikipedia entry at:
    57 //    http://en.wikipedia.org/wiki/UTF-8
    58 //
    59 //    Implemented as a recursive descent parser based on a simple state machine
    60 //    copyright 2005 Maarten Meijer
    61 //
    62 //    This cries out for a C-implementation to be included in PHP core
    63 //
    64 
    6551/**
    66  * @package WordPress
    67  * @subpackage Dotclear_Import
    68  *
    69  * @param string $char
    70  * @return string
    71  */
    72 function valid_1byte($char) {
    73     if(!is_int($char)) return false;
    74         return ($char & 0x80) == 0x00;
    75 }
    76 
    77 /**
    78  * @package WordPress
    79  * @subpackage Dotclear_Import
    80  *
    81  * @param string $char
    82  * @return string
    83  */
    84 function valid_2byte($char) {
    85     if(!is_int($char)) return false;
    86         return ($char & 0xE0) == 0xC0;
    87 }
    88 
    89 /**
    90  * @package WordPress
    91  * @subpackage Dotclear_Import
    92  *
    93  * @param string $char
    94  * @return string
    95  */
    96 function valid_3byte($char) {
    97     if(!is_int($char)) return false;
    98         return ($char & 0xF0) == 0xE0;
    99 }
    100 
    101 /**
    102  * @package WordPress
    103  * @subpackage Dotclear_Import
    104  *
    105  * @param string $char
    106  * @return string
    107  */
    108 function valid_4byte($char) {
    109     if(!is_int($char)) return false;
    110         return ($char & 0xF8) == 0xF0;
    111 }
    112 
    113 /**
    114  * @package WordPress
    115  * @subpackage Dotclear_Import
    116  *
    117  * @param string $char
    118  * @return string
    119  */
    120 function valid_nextbyte($char) {
    121     if(!is_int($char)) return false;
    122         return ($char & 0xC0) == 0x80;
    123 }
    124 
    125 /**
    126  * @package WordPress
    127  * @subpackage Dotclear_Import
    128  *
    129  * @param string $string
    130  * @return string
    131  */
    132 function valid_utf8($string) {
    133     $len = strlen($string);
    134     $i = 0;
    135     while( $i < $len ) {
    136         $char = ord(substr($string, $i++, 1));
    137         if(valid_1byte($char)) {    // continue
    138             continue;
    139         } else if(valid_2byte($char)) { // check 1 byte
    140             if(!valid_nextbyte(ord(substr($string, $i++, 1))))
    141                 return false;
    142         } else if(valid_3byte($char)) { // check 2 bytes
    143             if(!valid_nextbyte(ord(substr($string, $i++, 1))))
    144                 return false;
    145             if(!valid_nextbyte(ord(substr($string, $i++, 1))))
    146                 return false;
    147         } else if(valid_4byte($char)) { // check 3 bytes
    148             if(!valid_nextbyte(ord(substr($string, $i++, 1))))
    149                 return false;
    150             if(!valid_nextbyte(ord(substr($string, $i++, 1))))
    151                 return false;
    152             if(!valid_nextbyte(ord(substr($string, $i++, 1))))
    153                 return false;
    154         } // goto next char
    155     }
    156     return true; // done
    157 }
    158 
    159 /**
     52 * Convert from dotclear charset to utf8 if required
     53 *
    16054 * @package WordPress
    16155 * @subpackage Dotclear_Import
     
    16559 */
    16660function csc ($s) {
    167     if (valid_utf8 ($s)) {
     61    if (seems_utf8 ($s)) {
    16862        return $s;
    16963    } else {
Note: See TracChangeset for help on using the changeset viewer.