| | 79 | * clean invalid chars from css class names |
| | 80 | * |
| | 81 | * invalid chars will be replaced with '_', if the class |
| | 82 | * should not be treated as a suffix for an existing class, |
| | 83 | * then invalid classes will be fixed with an additional '_' |
| | 84 | * to validate again. |
| | 85 | * |
| | 86 | * @since 2.8.bleeding |
| | 87 | * |
| | 88 | * @param array|string $class array or string with class(es) |
| | 89 | * @param bool $suffix optional wether or not class(es) |
| | 90 | * are treated as being a suffix |
| | 91 | * (defaults to true) |
| | 92 | * @return array|string classname(es) cleaned |
| | 93 | */ |
| | 94 | function clean_css_classnames($class, $suffix = true) |
| | 95 | { |
| | 96 | if (is_array($class)) { // handle multiple class in form of an iterator |
| | 97 | $return = array(); |
| | 98 | foreach($class as $single) |
| | 99 | $return[] = clean_css_classnames($single, $suffix); |
| | 100 | return $return; |
| | 101 | } |
| | 102 | |
| | 103 | |
| | 104 | $partsUtf8 = split_utf8($class); |
| | 105 | $count = count($partsUtf8); |
| | 106 | if (is_array($partsUtf8) && $count != strlen($class)) { // has multi byte chars |
| | 107 | |
| | 108 | // filter invalid chars per entity, only single chars need to me checked |
| | 109 | for ($i=0; $i < $count; $i++) { |
| | 110 | if (strlen($partsUtf8[$i]) == 1) { |
| | 111 | $partsUtf8[$i] = preg_replace('|[^_a-zA-Z0-9\0-\177-]|', '_', $partsUtf8[$i]); |
| | 112 | } |
| | 113 | } |
| | 114 | $class = implode($partsUtf8); // rebuild class string |
| | 115 | } else { |
| | 116 | // filter invalid chars in whole string (SBCS assumed) |
| | 117 | $patternInvalid = '|[^_a-zA-Z0-9\0-\177-]|'; |
| | 118 | $class = preg_replace($patternInvalid, '_', $class); |
| | 119 | } |
| | 120 | |
| | 121 | // validate start of the classname (if suffix flag is false) |
| | 122 | // which must match -?{nmstart} then |
| | 123 | if (!$suffix) { |
| | 124 | // locate nmstart |
| | 125 | $indexValidate = ($partsUtf8[0] == '-') ? 1 : 0; |
| | 126 | // only single-byte sequences need to be checked |
| | 127 | if (strlen($partsUtf8[$indexValidate]) == 1) |
| | 128 | { |
| | 129 | //valid for nmstart |
| | 130 | $patternValid = '/[_a-zA-Z\200-\377]/'; |
| | 131 | if(!preg_match($patternValid, $partsUtf8[$indexValidate])) |
| | 132 | { |
| | 133 | $temp = array_splice($partsUtf8, $indexValidate, 0, array('_')); |
| | 134 | $class = implode($partsUtf8); |
| | 135 | } |
| | 136 | } |
| | 137 | } |
| | 138 | |
| | 139 | return $class; |
| | 140 | } |
| | 141 | |
| | 142 | /** |
| 169 | | function seems_utf8($Str) { # by bmorel at ssi dot fr |
| 170 | | $length = strlen($Str); |
| | 233 | function seems_utf8(&$str) { |
| | 234 | return is_array(split_utf8($str)); |
| | 235 | } |
| | 236 | |
| | 237 | /** |
| | 238 | * splits a binary string into it's UTF-8 entities |
| | 239 | * |
| | 240 | * NOTE: this function is not confirm with RFC 3629 because |
| | 241 | * it has been choosen to be compatbile with seems_utf8() |
| | 242 | * which uses an invalid maximum length of 5 bytes in- |
| | 243 | * stead of a maxmimum of 4 bytes (or theoretical maximum |
| | 244 | * of 8 bytes). |
| | 245 | * |
| | 246 | * @since 2.8.bleeding |
| | 247 | * |
| | 248 | * @see seems_utf8() |
| | 249 | * @param string $str The string to be splitted |
| | 250 | * @return array Sequence of UTF-8 entities. False on Failure. |
| | 251 | */ |
| | 252 | function split_utf8(&$str) { |
| | 253 | $parts = array(); |
| | 254 | $length = strlen($str); |
| 172 | | if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb |
| 173 | | elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb |
| 174 | | elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb |
| 175 | | elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb |
| 176 | | elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb |
| 177 | | elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b |
| | 256 | $c = ord($str[$i]); |
| | 257 | if ($c < 0x80) $n = 0; # 0bbbbbbb |
| | 258 | elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb |
| | 259 | elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb |
| | 260 | elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb |
| | 261 | elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb |
| | 262 | elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b |