| 1375 | | function make_clickable($ret) { |
| 1376 | | $ret = ' ' . $ret; |
| 1377 | | // in testing, using arrays here was found to be faster |
| 1378 | | $save = @ini_set('pcre.recursion_limit', 10000); |
| 1379 | | $retval = preg_replace_callback('#(?<!=[\'"])(?<=[*\')+.,;:!&$\s>])(\()?([\w]+?://(?:[\w\\x80-\\xff\#%~/?@\[\]-]{1,2000}|[\'*(+.,;:!=&$](?![\b\)]|(\))?([\s]|$))|(?(1)\)(?![\s<.,;:]|$)|\)))+)#is', '_make_url_clickable_cb', $ret); |
| 1380 | | if (null !== $retval ) |
| 1381 | | $ret = $retval; |
| 1382 | | @ini_set('pcre.recursion_limit', $save); |
| | 1383 | function make_clickable( $ret ) { |
| | 1384 | // Long strings might contain expensive edge cases ... |
| | 1385 | if ( 10000 < strlen( $ret ) ) { |
| | 1386 | $r = ''; |
| | 1387 | // ... break it up |
| | 1388 | foreach ( _split_str_by_whitespace( $ret, 2100 ) as $chunk ) { // 2100: Extra room for scheme and leading and trailing paretheses |
| | 1389 | if ( 2101 < strlen( $chunk ) ) { |
| | 1390 | $r .= $chunk; // Too big, no whitespace: bail. |
| | 1391 | } else { |
| | 1392 | $r .= make_clickable( $chunk ); |
| | 1393 | } |
| | 1394 | } |
| | 1395 | return $r; |
| | 1396 | } |
| | 1397 | |
| | 1398 | $ret = " $ret "; // Pad with whitespace to simplify the regexes |
| | 1399 | |
| | 1400 | $url_clickable = '~ |
| | 1401 | ([\\s\\(<]) # 1: Leading whitespace, opening parethesis (for parethesis balancing post processing), or opening angle bracket |
| | 1402 | ( # 2: URL |
| | 1403 | [\\w]{1,20}+:// # Scheme and hier-part prefix |
| | 1404 | (?=\S{1,2000}\s) # Limit to URLs less than about 2000 characters long |
| | 1405 | [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]*+ # Non-punctuation URL character |
| | 1406 | (?: # Unroll the Loop: Only allow puctuation URL character if followed by a non-punctuation URL character |
| | 1407 | [\'.,;:!?)] # Punctuation URL character |
| | 1408 | [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]++ # Non-punctuation URL character |
| | 1409 | )* |
| | 1410 | ) |
| | 1411 | (\)?) # 3: Trailing closing parenthesis (for parethesis balancing post processing) |
| | 1412 | ~xS'; // The regex is a non-anchored pattern and does not have a single fixed starting character. |
| | 1413 | // Tell PCRE to spend more time optimizing since, when used on a page load, it will probably be used several times. |
| | 1414 | |
| | 1415 | $ret = preg_replace_callback( $url_clickable, '_make_url_clickable_cb', $ret ); |
| | 1416 | |
| | 1426 | * Breaks a string into chunks by splitting at whitespace characters. |
| | 1427 | * The length of each returned chunk is as close to the specified length goal as possible, |
| | 1428 | * with the caveat that each chunk includes its trailing delimiter. |
| | 1429 | * Chunks longer than the goal are guaranteed to not have any inner whitespace. |
| | 1430 | * |
| | 1431 | * Joining the returned chunks with empty delimiters reconstructs the input string losslessly. |
| | 1432 | * |
| | 1433 | * Input string must have no null characters (or eventual transformations on output chunks must not care about null characters) |
| | 1434 | * |
| | 1435 | * <code> |
| | 1436 | * _split_str_by_whitespace( "1234 67890 1234 67890a cd 1234 890 123456789 1234567890a 45678 1 3 5 7 90 ", 10 ) == |
| | 1437 | * array ( |
| | 1438 | * 0 => '1234 67890 ', // 11 characters: Perfect split |
| | 1439 | * 1 => '1234 ', // 5 characters: '1234 67890a' was too long |
| | 1440 | * 2 => '67890a cd ', // 10 characters: '67890a cd 1234' was too long |
| | 1441 | * 3 => '1234 890 ', // 11 characters: Perfect split |
| | 1442 | * 4 => '123456789 ', // 10 characters: '123456789 1234567890a' was too long |
| | 1443 | * 5 => '1234567890a ', // 12 characters: Too long, but no inner whitespace on which to split |
| | 1444 | * 6 => ' 45678 ', // 11 characters: Perfect split |
| | 1445 | * 7 => '1 3 5 7 9', // 9 characters: End of $string |
| | 1446 | * ); |
| | 1447 | * </code> |
| | 1448 | * |
| | 1449 | * @param string $string The string to split |
| | 1450 | * @param int $goal The desired chunk length. |
| | 1451 | * |
| | 1452 | * @return array Numeric array of chunks. |
| | 1453 | */ |
| | 1454 | function _split_str_by_whitespace( $string, $goal ) { |
| | 1455 | $chunks = array(); |
| | 1456 | |
| | 1457 | $string_nullspace = strtr( $string, "\r\n\t\v\f ", "\000\000\000\000\000\000" ); |
| | 1458 | |
| | 1459 | while ( $goal < strlen( $string_nullspace ) ) { |
| | 1460 | $pos = strrpos( substr( $string_nullspace, 0, $goal + 1 ), "\000" ); |
| | 1461 | |
| | 1462 | if ( false === $pos ) { |
| | 1463 | $pos = strpos( $string_nullspace, "\000", $goal + 1 ); |
| | 1464 | if ( false === $pos ) { |
| | 1465 | break; |
| | 1466 | } |
| | 1467 | } |
| | 1468 | |
| | 1469 | $chunks[] = substr( $string, 0, $pos + 1 ); |
| | 1470 | $string = substr( $string, $pos + 1 ); |
| | 1471 | $string_nullspace = substr( $string_nullspace, $pos + 1 ); |
| | 1472 | } |
| | 1473 | |
| | 1474 | if ( $string ) { |
| | 1475 | $chunks[] = $string; |
| | 1476 | } |
| | 1477 | |
| | 1478 | return $chunks; |
| | 1479 | } |
| | 1480 | |
| | 1481 | /** |