| | 1 | <?php |
| | 2 | /* |
| | 3 | Copyright (c) 2011, Andrea Ercolino (http://noteslog.com) |
| | 4 | All rights reserved. |
| | 5 | |
| | 6 | Redistribution and use in source and binary forms, with or without |
| | 7 | modification, are permitted provided that the following conditions are met: |
| | 8 | * Redistributions of source code must retain the above copyright |
| | 9 | notice, this list of conditions and the following disclaimer. |
| | 10 | * Redistributions in binary form must reproduce the above copyright |
| | 11 | notice, this list of conditions and the following disclaimer in the |
| | 12 | documentation and/or other materials provided with the distribution. |
| | 13 | * Neither the name of the <organization> nor the |
| | 14 | names of its contributors may be used to endorse or promote products |
| | 15 | derived from this software without specific prior written permission. |
| | 16 | |
| | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| | 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| | 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| | 20 | DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY |
| | 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| | 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| | 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| | 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| | 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| | 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| | 27 | */ |
| | 28 | |
| | 29 | |
| | 30 | |
| | 31 | /** |
| | 32 | * @package Ando_Utf8 |
| | 33 | */ |
| | 34 | class Ando_Utf8_Exception extends Exception |
| | 35 | {} |
| | 36 | |
| | 37 | |
| | 38 | |
| | 39 | /** |
| | 40 | * Basic UTF-8 support |
| | 41 | * |
| | 42 | * @link http://noteslog.com/ |
| | 43 | * @link http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
| | 44 | * |
| | 45 | * @package Ando_Utf8 |
| | 46 | */ |
| | 47 | class Ando_Utf8 |
| | 48 | { |
| | 49 | /** |
| | 50 | * Escape UTF-8 characters using the given options |
| | 51 | * |
| | 52 | * About the write.callback option |
| | 53 | * -- it receives |
| | 54 | * -- -- the given write.arguments |
| | 55 | * -- -- the unicode of the current UTF-8 character |
| | 56 | * -- -- the current (unescaped) UTF-8 character |
| | 57 | * -- it must return the current escaped UTF-8 character |
| | 58 | * |
| | 59 | * @link http://noteslog.com/post/escaping-and-unescaping-utf-8-characters-in-php/ |
| | 60 | * |
| | 61 | * @param string $value |
| | 62 | * @param array $options |
| | 63 | * 'escapeControlChars' => boolean (default: TRUE), |
| | 64 | * 'escapePrintableASCII' => boolean (default: FALSE), |
| | 65 | * 'write' => array( |
| | 66 | * 'callback' => callable (default: 'sprintf'), |
| | 67 | * 'arguments' => array (default: array('\u%04x')), |
| | 68 | * ), |
| | 69 | * 'extendedUseSurrogate' => boolean (default: true), |
| | 70 | * |
| | 71 | * @throws Ando_Utf8_Exception If the code point of any char in $value is |
| | 72 | * not unicode |
| | 73 | * @return string |
| | 74 | */ |
| | 75 | public static function escape($value, array $options = array()) |
| | 76 | { |
| | 77 | $options = array_merge(array( |
| | 78 | 'escapeControlChars' => true, |
| | 79 | 'escapePrintableASCII' => false, |
| | 80 | 'write' => array( |
| | 81 | 'callback' => 'sprintf', |
| | 82 | 'arguments' => array('\u%04x'), |
| | 83 | ), |
| | 84 | 'extendedUseSurrogate' => true, |
| | 85 | ), $options); |
| | 86 | if (! self::isCallable($options['write'])) |
| | 87 | { |
| | 88 | throw new Ando_Utf8_Exception('Expected a valid write handler (callable, array).'); |
| | 89 | } |
| | 90 | if (self::validateFilters($options) && isset($options['filters']['before-write'])) |
| | 91 | { |
| | 92 | $value = self::call($options['filters']['before-write'], $value); |
| | 93 | } |
| | 94 | |
| | 95 | $result = ""; |
| | 96 | $length = strlen($value); |
| | 97 | for($i = 0; $i < $length; $i++) { |
| | 98 | $ord_var_c = ord($value[$i]); |
| | 99 | |
| | 100 | switch (true) { |
| | 101 | case ($ord_var_c < 0x20): |
| | 102 | // code points 0x00000000..0x0000001F, mask 0xxxxxxx |
| | 103 | $utf8Char = $value[$i]; |
| | 104 | $result .= $options['escapeControlChars'] |
| | 105 | ? self::call($options['write'], array($ord_var_c, $utf8Char)) |
| | 106 | : $value[$i]; |
| | 107 | break; |
| | 108 | |
| | 109 | case ($ord_var_c < 0x80): |
| | 110 | // code points 0x00000020..0x0000007F, mask 0xxxxxxx |
| | 111 | $utf8Char = $value[$i]; |
| | 112 | $result .= $options['escapePrintableASCII'] |
| | 113 | ? self::call($options['write'], array($ord_var_c, $utf8Char)) |
| | 114 | : $value[$i]; |
| | 115 | break; |
| | 116 | |
| | 117 | case (($ord_var_c & 0xE0) == 0xC0): |
| | 118 | // code points 0x00000080..0x000007FF, mask 110yyyyy 10xxxxxx |
| | 119 | $utf8Char = substr($value, $i, 2); $i += 1; |
| | 120 | $code = self::utf8CharToCodePoint($utf8Char); |
| | 121 | $result .= self::call($options['write'], array($code, $utf8Char)); |
| | 122 | break; |
| | 123 | |
| | 124 | case (($ord_var_c & 0xF0) == 0xE0): |
| | 125 | // code points 0x00000800..0x0000FFFF, mask 1110zzzz 10yyyyyy 10xxxxxx |
| | 126 | $utf8Char = substr($value, $i, 3); $i += 2; |
| | 127 | $code = self::utf8CharToCodePoint($utf8Char); |
| | 128 | $result .= self::call($options['write'], array($code, $utf8Char)); |
| | 129 | break; |
| | 130 | |
| | 131 | case (($ord_var_c & 0xF8) == 0xF0): |
| | 132 | // code points 0x00010000..0x0010FFFF, mask 11110www 10zzzzzz 10yyyyyy 10xxxxxx |
| | 133 | $utf8Char = substr($value, $i, 4); $i += 3; |
| | 134 | if ($options['extendedUseSurrogate']) |
| | 135 | { |
| | 136 | list($upper, $lower) = self::utf8CharToSurrogatePair($utf8Char); |
| | 137 | $result .= self::call($options['write'], array($upper, $utf8Char)); |
| | 138 | $result .= self::call($options['write'], array($lower, $utf8Char)); |
| | 139 | } |
| | 140 | else |
| | 141 | { |
| | 142 | $code = self::utf8CharToCodePoint($utf8Char); |
| | 143 | $result .= self::call($options['write'], array($code, $utf8Char)); |
| | 144 | } |
| | 145 | break; |
| | 146 | |
| | 147 | default: |
| | 148 | //no more cases in unicode, whose range is 0x00000000..0x0010FFFF |
| | 149 | throw new Ando_Utf8_Exception('Expected a valid UTF-8 character.'); |
| | 150 | break; |
| | 151 | } |
| | 152 | } |
| | 153 | |
| | 154 | return $result; |
| | 155 | } |
| | 156 | |
| | 157 | /** |
| | 158 | * Compute the code point of a given UTF-8 character |
| | 159 | * |
| | 160 | * If available, use the multibye string function mb_convert_encoding |
| | 161 | * TODO reject overlong sequences in $utf8Char |
| | 162 | * |
| | 163 | * @link http://noteslog.com/post/escaping-and-unescaping-utf-8-characters-in-php/ |
| | 164 | * |
| | 165 | * @param string $utf8Char |
| | 166 | * @throws Ando_Utf8_Exception If the code point of $utf8Char is not unicode |
| | 167 | * @return integer |
| | 168 | */ |
| | 169 | public static function utf8CharToCodePoint($utf8Char) |
| | 170 | { |
| | 171 | if (function_exists('mb_convert_encoding')) |
| | 172 | { |
| | 173 | $utf32Char = mb_convert_encoding($utf8Char, 'UTF-32', 'UTF-8'); |
| | 174 | } |
| | 175 | else |
| | 176 | { |
| | 177 | $bytes = array('C*'); |
| | 178 | list(, $utf8Int) = unpack('N', str_repeat(chr(0), 4 - strlen($utf8Char)) . $utf8Char); |
| | 179 | switch (strlen($utf8Char)) |
| | 180 | { |
| | 181 | case 1: |
| | 182 | //Code points U+0000..U+007F |
| | 183 | //mask 0xxxxxxx (7 bits) |
| | 184 | //map to 00000000 00000000 00000000 0xxxxxxx |
| | 185 | $bytes[] = 0; |
| | 186 | $bytes[] = 0; |
| | 187 | $bytes[] = 0; |
| | 188 | $bytes[] = $utf8Int; |
| | 189 | break; |
| | 190 | |
| | 191 | case 2: |
| | 192 | //Code points U+0080..U+07FF |
| | 193 | //mask 110yyyyy 10xxxxxx (5 + 6 = 11 bits) |
| | 194 | //map to 00000000 00000000 00000yyy yyxxxxxx |
| | 195 | $bytes[] = 0; |
| | 196 | $bytes[] = 0; |
| | 197 | $bytes[] = $utf8Int >> 10 & 0x07; |
| | 198 | $bytes[] = $utf8Int >> 2 & 0xC0 | $utf8Int & 0x3F; |
| | 199 | break; |
| | 200 | |
| | 201 | case 3: |
| | 202 | //Code points U+0800..U+D7FF and U+E000..U+FFFF |
| | 203 | //mask 1110zzzz 10yyyyyy 10xxxxxx (4 + 6 + 6 = 16 bits) |
| | 204 | //map to 00000000 00000000 zzzzyyyy yyxxxxxx |
| | 205 | $bytes[] = 0; |
| | 206 | $bytes[] = 0; |
| | 207 | $bytes[] = $utf8Int >> 12 & 0xF0 | $utf8Int >> 10 & 0x0F; |
| | 208 | $bytes[] = $utf8Int >> 2 & 0xC0 | $utf8Int & 0x3F; |
| | 209 | break; |
| | 210 | |
| | 211 | case 4: |
| | 212 | //Code points U+10000..U+10FFFF |
| | 213 | //mask 11110www 10zzzzzz 10yyyyyy 10xxxxxx (3 + 6 + 6 + 6 = 21 bits) |
| | 214 | //map to 00000000 000wwwzz zzzzyyyy yyxxxxxx |
| | 215 | $bytes[] = 0; |
| | 216 | $bytes[] = $utf8Int >> 22 & 0x1C | $utf8Int >> 20 & 0x03; |
| | 217 | $bytes[] = $utf8Int >> 12 & 0xF0 | $utf8Int >> 10 & 0x0F; |
| | 218 | $bytes[] = $utf8Int >> 2 & 0xC0 | $utf8Int & 0x3F; |
| | 219 | break; |
| | 220 | |
| | 221 | default: |
| | 222 | //no more cases in unicode, whose range is 0x00000000 - 0x0010FFFF |
| | 223 | throw new Ando_Utf8_Exception('Expected a valid UTF-8 character.'); |
| | 224 | break; |
| | 225 | } |
| | 226 | $utf32Char = call_user_func_array('pack', $bytes); |
| | 227 | } |
| | 228 | list(, $result) = unpack('N', $utf32Char); //unpack returns an array with base 1 |
| | 229 | if (0xD800 <= $result && $result <= 0xDFFF) |
| | 230 | { |
| | 231 | //reserved for UTF-16 surrogates |
| | 232 | throw new Ando_Utf8_Exception('Expected a valid UTF-8 character.'); |
| | 233 | } |
| | 234 | if (0xFFFE == $result || 0xFFFF == $result) |
| | 235 | { |
| | 236 | //reserved |
| | 237 | throw new Ando_Utf8_Exception('Expected a valid UTF-8 character.'); |
| | 238 | } |
| | 239 | |
| | 240 | return $result; |
| | 241 | } |
| | 242 | |
| | 243 | /** |
| | 244 | * Compute the surrogate pair of a given extended UTF-8 character |
| | 245 | * |
| | 246 | * @link http://noteslog.com/post/escaping-and-unescaping-utf-8-characters-in-php/ |
| | 247 | * @link http://en.wikipedia.org/wiki/UTF-16/UCS-2 |
| | 248 | * |
| | 249 | * @param string $utf8Char |
| | 250 | * @throws Ando_Utf8_Exception If the code point of $utf8Char is not extended unicode |
| | 251 | * @return array |
| | 252 | */ |
| | 253 | public static function utf8CharToSurrogatePair($utf8Char) |
| | 254 | { |
| | 255 | $codePoint = self::utf8CharToCodePoint($utf8Char); |
| | 256 | if ($codePoint < 0x10000) |
| | 257 | { |
| | 258 | throw new Ando_Utf8_Exception('Expected an extended UTF-8 character.'); |
| | 259 | } |
| | 260 | $codePoint -= 0x10000; |
| | 261 | $upperSurrogate = 0xD800 + ($codePoint >> 10); |
| | 262 | $lowerSurrogate = 0xDC00 + ($codePoint & 0x03FF); |
| | 263 | $result = array($upperSurrogate, $lowerSurrogate); |
| | 264 | |
| | 265 | return $result; |
| | 266 | } |
| | 267 | |
| | 268 | /** |
| | 269 | * Unescape UTF-8 characters from a given escape format |
| | 270 | * |
| | 271 | * About the read.callback option |
| | 272 | * -- it receives |
| | 273 | * -- -- the given read.arguments |
| | 274 | * -- -- the current match of the pattern with all submatches |
| | 275 | * -- it must return the current unicode integer |
| | 276 | * |
| | 277 | * @link http://noteslog.com/post/escaping-and-unescaping-utf-8-characters-in-php/ |
| | 278 | * |
| | 279 | * @param string $value |
| | 280 | * @param array $options |
| | 281 | * 'read' => array( |
| | 282 | * 'pattern' => preg (default: '@\\\\u([0-9A-Fa-f]{4})@'), |
| | 283 | * 'callback' => callable (default: create_function('$all, $code', 'return hexdec($code);')), |
| | 284 | * 'arguments' => array (deafult: array()), |
| | 285 | * ), |
| | 286 | * 'extendedUseSurrogate' => boolean (default: TRUE), |
| | 287 | * |
| | 288 | * @throws Ando_Utf8_Exception If the code point of any char in $value is |
| | 289 | * not unicode |
| | 290 | * |
| | 291 | * @return string |
| | 292 | */ |
| | 293 | public static function unescape($value, array $options = array()) |
| | 294 | { |
| | 295 | $options = array_merge(array( |
| | 296 | 'read' => array( |
| | 297 | 'pattern' => '@\\\\u([0-9A-Fa-f]{4})@', |
| | 298 | 'callback' => create_function('$all, $code', 'return hexdec($code);'), |
| | 299 | 'arguments' => array(), |
| | 300 | ), |
| | 301 | 'extendedUseSurrogate' => true, |
| | 302 | ), $options); |
| | 303 | |
| | 304 | if (! self::isCallable($options['read'])) |
| | 305 | { |
| | 306 | throw new Ando_Utf8_Exception('Expected a valid read handler (callable, array).'); |
| | 307 | } |
| | 308 | $thereAreFilters = self::validateFilters($options); |
| | 309 | |
| | 310 | $result = ""; |
| | 311 | while (preg_match($options['read']['pattern'], $value, $matches, PREG_OFFSET_CAPTURE)) |
| | 312 | { |
| | 313 | $unicode = self::eatUpMatches($result, $value, $matches, $options['read']); |
| | 314 | if ($options['extendedUseSurrogate'] && (0xD800 <= $unicode && $unicode < 0xDC00)) |
| | 315 | { |
| | 316 | $upperSurrogate = $unicode; |
| | 317 | if (! preg_match($options['read']['pattern'], $value, $matches, PREG_OFFSET_CAPTURE)) |
| | 318 | { |
| | 319 | throw new Ando_Utf8_Exception('Expected an extended UTF-8 character.'); |
| | 320 | } |
| | 321 | $unicode = self::eatUpMatches($result, $value, $matches, $options['read']); |
| | 322 | $utf8Char = self::utf8CharFromSurrogatePair(array($upperSurrogate, $unicode)); |
| | 323 | } |
| | 324 | else |
| | 325 | { |
| | 326 | $utf8Char = self::utf8CharFromCodePoint($unicode); |
| | 327 | } |
| | 328 | $result .= $utf8Char; |
| | 329 | } |
| | 330 | $result .= $value; |
| | 331 | |
| | 332 | if ($thereAreFilters && isset($options['filters']['after-read'])) |
| | 333 | { |
| | 334 | $result = self::call($options['filters']['after-read'], $result); |
| | 335 | } |
| | 336 | |
| | 337 | return $result; |
| | 338 | } |
| | 339 | |
| | 340 | /** |
| | 341 | * Compute the UTF-8 character of a given code point |
| | 342 | * |
| | 343 | * If available, use the multibye string function mb_convert_encoding |
| | 344 | * |
| | 345 | * @link http://noteslog.com/post/escaping-and-unescaping-utf-8-characters-in-php/ |
| | 346 | * |
| | 347 | * @param integer $codePoint |
| | 348 | * @throws Ando_Utf8_Exception if the code point is not unicode |
| | 349 | * @return string |
| | 350 | */ |
| | 351 | public static function utf8CharFromCodePoint($codePoint) |
| | 352 | { |
| | 353 | if (0xD800 <= $codePoint && $codePoint <= 0xDFFF) |
| | 354 | { |
| | 355 | //reserved for UTF-16 surrogates |
| | 356 | throw new Ando_Utf8_Exception('Expected a valid code point.'); |
| | 357 | } |
| | 358 | if (0xFFFE == $codePoint || 0xFFFF == $codePoint) |
| | 359 | { |
| | 360 | //reserved |
| | 361 | throw new Ando_Utf8_Exception('Expected a valid code point.'); |
| | 362 | } |
| | 363 | |
| | 364 | if (function_exists('mb_convert_encoding')) |
| | 365 | { |
| | 366 | $utf32Char = pack('N', $codePoint); |
| | 367 | $result = mb_convert_encoding($utf32Char, 'UTF-8', 'UTF-32'); |
| | 368 | } |
| | 369 | else |
| | 370 | { |
| | 371 | $bytes = array('C*'); |
| | 372 | switch (true) |
| | 373 | { |
| | 374 | case ($codePoint < 0x80): |
| | 375 | //Code points U+0000..U+007F |
| | 376 | //mask 0xxxxxxx (7 bits) |
| | 377 | //map from xxxxxxx |
| | 378 | $bytes[] = $codePoint; |
| | 379 | break; |
| | 380 | |
| | 381 | case ($codePoint < 0x800): |
| | 382 | //Code points U+0080..U+07FF |
| | 383 | //mask 110yyyyy 10xxxxxx (5 + 6 = 11 bits) |
| | 384 | //map from yyy yyxxxxxx |
| | 385 | $bytes[] = 0xC0 | $codePoint >> 6; |
| | 386 | $bytes[] = 0x80 | $codePoint & 0x3F; |
| | 387 | break; |
| | 388 | |
| | 389 | case ($codePoint < 0x10000): |
| | 390 | //Code points U+0800..U+D7FF and U+E000..U+FFFF |
| | 391 | //mask 1110zzzz 10yyyyyy 10xxxxxx (4 + 6 + 6 = 16 bits) |
| | 392 | //map from zzzzyyyy yyxxxxxx |
| | 393 | $bytes[] = 0xE0 | $codePoint >> 12; |
| | 394 | $bytes[] = 0x80 | $codePoint >> 6 & 0x3F; |
| | 395 | $bytes[] = 0x80 | $codePoint & 0x3F; |
| | 396 | break; |
| | 397 | |
| | 398 | case ($codePoint < 0x110000): |
| | 399 | //Code points U+10000..U+10FFFF |
| | 400 | //mask 11110www 10zzzzzz 10yyyyyy 10xxxxxx (3 + 6 + 6 + 6 = 21 bits) |
| | 401 | //map from wwwzz zzzzyyyy yyxxxxxx |
| | 402 | $bytes[] = 0xF0 | $codePoint >> 18; |
| | 403 | $bytes[] = 0x80 | $codePoint >> 12 & 0x3F; |
| | 404 | $bytes[] = 0x80 | $codePoint >> 6 & 0x3F; |
| | 405 | $bytes[] = 0x80 | $codePoint & 0x3F; |
| | 406 | break; |
| | 407 | |
| | 408 | default: |
| | 409 | throw new Ando_Utf8_Exception('Expected a valid code point.'); |
| | 410 | break; |
| | 411 | } |
| | 412 | $result = call_user_func_array('pack', $bytes); |
| | 413 | } |
| | 414 | return $result; |
| | 415 | } |
| | 416 | |
| | 417 | /** |
| | 418 | * Compute the extended UTF-8 character of a given surrogate pair |
| | 419 | * |
| | 420 | * @link http://noteslog.com/post/escaping-and-unescaping-utf-8-characters-in-php/ |
| | 421 | * @link http://en.wikipedia.org/wiki/UTF-16/UCS-2 |
| | 422 | * |
| | 423 | * @param array $surrogatePair |
| | 424 | * @throws Ando_Utf8_Exception If the surrogate pair is not extended unicode |
| | 425 | * @return string |
| | 426 | */ |
| | 427 | public static function utf8CharFromSurrogatePair($surrogatePair) |
| | 428 | { |
| | 429 | list($upperSurrogate, $lowerSurrogate) = $surrogatePair; |
| | 430 | if (! (0xD800 <= $upperSurrogate && $upperSurrogate < 0xDC00)) |
| | 431 | { |
| | 432 | throw new Ando_Utf8_Exception('Expected an extended UTF-8 character.'); |
| | 433 | } |
| | 434 | if (! (0xDC00 <= $lowerSurrogate && $lowerSurrogate < 0xE000)) |
| | 435 | { |
| | 436 | throw new Ando_Utf8_Exception('Expected an extended UTF-8 character.'); |
| | 437 | } |
| | 438 | $codePoint = ($upperSurrogate & 0x03FF) << 10 | ($lowerSurrogate & 0x03FF); |
| | 439 | $codePoint += 0x10000; |
| | 440 | $result = self::utf8CharFromCodePoint($codePoint); |
| | 441 | |
| | 442 | return $result; |
| | 443 | } |
| | 444 | |
| | 445 | /** |
| | 446 | * Validate filters. If there are filters return true, else false |
| | 447 | * |
| | 448 | * @param array $options |
| | 449 | * @throws Ando_Utf8_Exception If there are malformed filters |
| | 450 | * @return boolean |
| | 451 | */ |
| | 452 | protected static function validateFilters($options) |
| | 453 | { |
| | 454 | if (isset($options['filters'])) |
| | 455 | { |
| | 456 | if (! is_array($options['filters'])) |
| | 457 | { |
| | 458 | throw new Ando_Utf8_Exception('Expected valid filters.'); |
| | 459 | } |
| | 460 | foreach ($options['filters'] as $key => $value) |
| | 461 | { |
| | 462 | if (! self::isCallable($value)) |
| | 463 | { |
| | 464 | throw new Ando_Utf8_Exception("Expected a valid $key handler."); |
| | 465 | } |
| | 466 | } |
| | 467 | return true; |
| | 468 | } |
| | 469 | return false; |
| | 470 | } |
| | 471 | |
| | 472 | /** |
| | 473 | * A little calling interface: validation |
| | 474 | * |
| | 475 | * @param array $handler |
| | 476 | * @return boolean |
| | 477 | */ |
| | 478 | private static function isCallable($handler) |
| | 479 | { |
| | 480 | $result = is_callable($handler['callback']) && is_array($handler['arguments']); |
| | 481 | return $result; |
| | 482 | } |
| | 483 | |
| | 484 | /** |
| | 485 | * A little calling interface: call |
| | 486 | * |
| | 487 | * @param array $handler |
| | 488 | * @param mixed $args |
| | 489 | * @return mixed |
| | 490 | */ |
| | 491 | private static function call($handler, $args) |
| | 492 | { |
| | 493 | $args = array_merge($handler['arguments'], is_array($args) ? $args : array($args)); |
| | 494 | $result = call_user_func_array($handler['callback'], $args); |
| | 495 | return $result; |
| | 496 | } |
| | 497 | |
| | 498 | /** |
| | 499 | * Return the transposition of the given array |
| | 500 | * |
| | 501 | * @param array $rows |
| | 502 | * @return array |
| | 503 | */ |
| | 504 | private static function transpose($rows) |
| | 505 | { |
| | 506 | $result = call_user_func_array('array_map', array_merge(array(null), $rows)); |
| | 507 | return $result; |
| | 508 | } |
| | 509 | |
| | 510 | /** |
| | 511 | * 1: update $processed with the unmatched substring before $matches |
| | 512 | * 2: update $value with the rest of the substring after $matches |
| | 513 | * 3: return unicode read from the matched substring in $matches |
| | 514 | * |
| | 515 | * @param string $processed |
| | 516 | * @param string $value |
| | 517 | * @param array $matches |
| | 518 | * @param array $handler |
| | 519 | * @return integer |
| | 520 | */ |
| | 521 | private static function eatUpMatches(&$processed, &$value, $matches, $handler) |
| | 522 | { |
| | 523 | $match = $matches[0][0]; |
| | 524 | $offset = $matches[0][1]; |
| | 525 | $processed .= substr($value, 0, $offset); |
| | 526 | $value = substr($value, $offset + strlen($match)); |
| | 527 | |
| | 528 | $matches = self::transpose($matches); |
| | 529 | $args = $matches[0]; |
| | 530 | $result = self::call($handler, $args); |
| | 531 | |
| | 532 | return $result; |
| | 533 | } |
| | 534 | |
| | 535 | } |