Changeset 20493 for branches/3.3/wp-includes/formatting.php
- Timestamp:
- 04/17/2012 08:02:49 PM (13 years ago)
- Location:
- branches/3.3
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/3.3
-
branches/3.3/wp-includes/formatting.php
r19187 r20493 1351 1351 function _make_url_clickable_cb($matches) { 1352 1352 $url = $matches[2]; 1353 $suffix = ''; 1354 1355 /** Include parentheses in the URL only if paired **/ 1353 1354 if ( ')' == $matches[3] && strpos( $url, '(' ) ) { 1355 // If the trailing character is a closing parethesis, and the URL has an opening parenthesis in it, add the closing parenthesis to the URL. 1356 // Then we can let the parenthesis balancer do its thing below. 1357 $url .= $matches[3]; 1358 $suffix = ''; 1359 } else { 1360 $suffix = $matches[3]; 1361 } 1362 1363 // Include parentheses in the URL only if paired 1356 1364 while ( substr_count( $url, '(' ) < substr_count( $url, ')' ) ) { 1357 1365 $suffix = strrchr( $url, ')' ) . $suffix; … … 1419 1427 * @since 0.71 1420 1428 * 1421 * @param string $ ret Content to convert URIs.1429 * @param string $text Content to convert URIs. 1422 1430 * @return string Content with converted URIs. 1423 1431 */ 1424 function make_clickable($ret) { 1425 $ret = ' ' . $ret; 1426 // in testing, using arrays here was found to be faster 1427 $save = @ini_set('pcre.recursion_limit', 10000); 1428 $retval = preg_replace_callback('#(?<!=[\'"])(?<=[*\')+.,;:!&$\s>])(\()?([\w]+?://(?:[\w\\x80-\\xff\#%~/?@\[\]-]{1,2000}|[\'*(+.,;:!=&$](?![\b\)]|(\))?([\s]|$))|(?(1)\)(?![\s<.,;:]|$)|\)))+)#is', '_make_url_clickable_cb', $ret); 1429 if (null !== $retval ) 1430 $ret = $retval; 1431 @ini_set('pcre.recursion_limit', $save); 1432 $ret = preg_replace_callback('#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]+)#is', '_make_web_ftp_clickable_cb', $ret); 1433 $ret = preg_replace_callback('#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret); 1434 // this one is not in an array because we need it to run last, for cleanup of accidental links within links 1435 $ret = preg_replace("#(<a( [^>]+?>|>))<a [^>]+?>([^>]+?)</a></a>#i", "$1$3</a>", $ret); 1436 $ret = trim($ret); 1437 return $ret; 1432 function make_clickable( $text ) { 1433 $r = ''; 1434 $textarr = preg_split( '/(<[^<>]+>)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); // split out HTML tags 1435 foreach ( $textarr as $piece ) { 1436 if ( empty( $piece ) || ( $piece[0] == '<' && ! preg_match('|^<\s*[\w]{1,20}+://|', $piece) ) ) { 1437 $r .= $piece; 1438 continue; 1439 } 1440 1441 // Long strings might contain expensive edge cases ... 1442 if ( 10000 < strlen( $piece ) ) { 1443 // ... break it up 1444 foreach ( _split_str_by_whitespace( $piece, 2100 ) as $chunk ) { // 2100: Extra room for scheme and leading and trailing paretheses 1445 if ( 2101 < strlen( $chunk ) ) { 1446 $r .= $chunk; // Too big, no whitespace: bail. 1447 } else { 1448 $r .= make_clickable( $chunk ); 1449 } 1450 } 1451 } else { 1452 $ret = " $piece "; // Pad with whitespace to simplify the regexes 1453 1454 $url_clickable = '~ 1455 ([\\s(<.,;:!?]) # 1: Leading whitespace, or punctuation 1456 ( # 2: URL 1457 [\\w]{1,20}+:// # Scheme and hier-part prefix 1458 (?=\S{1,2000}\s) # Limit to URLs less than about 2000 characters long 1459 [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]*+ # Non-punctuation URL character 1460 (?: # Unroll the Loop: Only allow puctuation URL character if followed by a non-punctuation URL character 1461 [\'.,;:!?)] # Punctuation URL character 1462 [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]++ # Non-punctuation URL character 1463 )* 1464 ) 1465 (\)?) # 3: Trailing closing parenthesis (for parethesis balancing post processing) 1466 ~xS'; // The regex is a non-anchored pattern and does not have a single fixed starting character. 1467 // Tell PCRE to spend more time optimizing since, when used on a page load, it will probably be used several times. 1468 1469 $ret = preg_replace_callback( $url_clickable, '_make_url_clickable_cb', $ret ); 1470 1471 $ret = preg_replace_callback( '#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]+)#is', '_make_web_ftp_clickable_cb', $ret ); 1472 $ret = preg_replace_callback( '#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret ); 1473 1474 $ret = substr( $ret, 1, -1 ); // Remove our whitespace padding. 1475 $r .= $ret; 1476 } 1477 } 1478 1479 // Cleanup of accidental links within links 1480 $r = preg_replace( '#(<a( [^>]+?>|>))<a [^>]+?>([^>]+?)</a></a>#i', "$1$3</a>", $r ); 1481 return $r; 1482 } 1483 1484 /** 1485 * Breaks a string into chunks by splitting at whitespace characters. 1486 * The length of each returned chunk is as close to the specified length goal as possible, 1487 * with the caveat that each chunk includes its trailing delimiter. 1488 * Chunks longer than the goal are guaranteed to not have any inner whitespace. 1489 * 1490 * Joining the returned chunks with empty delimiters reconstructs the input string losslessly. 1491 * 1492 * Input string must have no null characters (or eventual transformations on output chunks must not care about null characters) 1493 * 1494 * <code> 1495 * _split_str_by_whitespace( "1234 67890 1234 67890a cd 1234 890 123456789 1234567890a 45678 1 3 5 7 90 ", 10 ) == 1496 * array ( 1497 * 0 => '1234 67890 ', // 11 characters: Perfect split 1498 * 1 => '1234 ', // 5 characters: '1234 67890a' was too long 1499 * 2 => '67890a cd ', // 10 characters: '67890a cd 1234' was too long 1500 * 3 => '1234 890 ', // 11 characters: Perfect split 1501 * 4 => '123456789 ', // 10 characters: '123456789 1234567890a' was too long 1502 * 5 => '1234567890a ', // 12 characters: Too long, but no inner whitespace on which to split 1503 * 6 => ' 45678 ', // 11 characters: Perfect split 1504 * 7 => '1 3 5 7 9', // 9 characters: End of $string 1505 * ); 1506 * </code> 1507 * 1508 * @since 3.4.0 1509 * @access private 1510 * 1511 * @param string $string The string to split 1512 * @param int $goal The desired chunk length. 1513 * @return array Numeric array of chunks. 1514 */ 1515 function _split_str_by_whitespace( $string, $goal ) { 1516 $chunks = array(); 1517 1518 $string_nullspace = strtr( $string, "\r\n\t\v\f ", "\000\000\000\000\000\000" ); 1519 1520 while ( $goal < strlen( $string_nullspace ) ) { 1521 $pos = strrpos( substr( $string_nullspace, 0, $goal + 1 ), "\000" ); 1522 1523 if ( false === $pos ) { 1524 $pos = strpos( $string_nullspace, "\000", $goal + 1 ); 1525 if ( false === $pos ) { 1526 break; 1527 } 1528 } 1529 1530 $chunks[] = substr( $string, 0, $pos + 1 ); 1531 $string = substr( $string, $pos + 1 ); 1532 $string_nullspace = substr( $string_nullspace, $pos + 1 ); 1533 } 1534 1535 if ( $string ) { 1536 $chunks[] = $string; 1537 } 1538 1539 return $chunks; 1438 1540 } 1439 1541
Note: See TracChangeset
for help on using the changeset viewer.