WordPress.org

Make WordPress Core

Ticket #16892: 16892.7.patch

File 16892.7.patch, 5.9 KB (added by duck_, 6 years ago)
  • wp-includes/formatting.php

     
    13871387 */
    13881388function _make_url_clickable_cb($matches) {
    13891389        $url = $matches[2];
    1390         $suffix = '';
    13911390
    1392         /** Include parentheses in the URL only if paired **/
     1391        if ( ')' == $matches[3] && strpos( $url, '(' ) ) {
     1392                // If the trailing character is a closing parethesis, and the URL has an opening parenthesis in it, add the closing parenthesis to the URL.
     1393                // Then we can let the parenthesis balancer do its thing below.
     1394                $url .= $matches[3];
     1395                $suffix = '';
     1396        } else {
     1397                $suffix = $matches[3];
     1398        }
     1399
     1400        // Include parentheses in the URL only if paired
    13931401        while ( substr_count( $url, '(' ) < substr_count( $url, ')' ) ) {
    13941402                $suffix = strrchr( $url, ')' ) . $suffix;
    13951403                $url = substr( $url, 0, strrpos( $url, ')' ) );
     
    14581466 * @param string $ret Content to convert URIs.
    14591467 * @return string Content with converted URIs.
    14601468 */
    1461 function make_clickable($ret) {
    1462         $ret = ' ' . $ret;
    1463         // in testing, using arrays here was found to be faster
    1464         $save = @ini_set('pcre.recursion_limit', 10000);
    1465         $retval = preg_replace_callback('#(?<!=[\'"])(?<=[*\')+.,;:!&$\s>])(\()?([\w]+?://(?:[\w\\x80-\\xff\#%~/?@\[\]-]{1,2000}|[\'*(+.,;:!=&$](?![\b\)]|(\))?([\s]|$))|(?(1)\)(?![\s<.,;:]|$)|\)))+)#is', '_make_url_clickable_cb', $ret);
    1466         if (null !== $retval )
    1467                 $ret = $retval;
    1468         @ini_set('pcre.recursion_limit', $save);
     1469function make_clickable( $ret ) {
     1470        // Long strings might contain expensive edge cases ...
     1471        if ( 10000 < strlen( $ret ) ) {
     1472                $r = '';
     1473                // ... break it up
     1474                foreach ( _split_str_by_whitespace( $ret, 2100 ) as $chunk ) { // 2100: Extra room for scheme and leading and trailing paretheses
     1475                        if ( 2101 < strlen( $chunk ) ) {
     1476                                $r .= $chunk; // Too big, no whitespace: bail.
     1477                        } else {
     1478                                $r .= make_clickable( $chunk );
     1479                        }
     1480                }
     1481                return $r;
     1482        }
     1483
     1484        $ret = " $ret "; // Pad with whitespace to simplify the regexes
     1485
     1486        $url_clickable = '~
     1487                ([\\s(<.,;:!?])                                        # 1: Leading whitespace, or punctuation
     1488                (                                                      # 2: URL
     1489                        [\\w]{1,20}+://                                # Scheme and hier-part prefix
     1490                        (?=\S{1,2000}\s)                               # Limit to URLs less than about 2000 characters long
     1491                        [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]*+         # Non-punctuation URL character
     1492                        (?:                                            # Unroll the Loop: Only allow puctuation URL character if followed by a non-punctuation URL character
     1493                                [\'.,;:!?)]                            # Punctuation URL character
     1494                                [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]++ # Non-punctuation URL character
     1495                        )*
     1496                )
     1497                (\)?)                                                  # 3: Trailing closing parenthesis (for parethesis balancing post processing)
     1498        ~xS'; // The regex is a non-anchored pattern and does not have a single fixed starting character.
     1499              // Tell PCRE to spend more time optimizing since, when used on a page load, it will probably be used several times.
     1500
     1501        $ret = preg_replace_callback( $url_clickable, '_make_url_clickable_cb', $ret );
     1502
    14691503        $ret = preg_replace_callback('#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]+)#is', '_make_web_ftp_clickable_cb', $ret);
    14701504        $ret = preg_replace_callback('#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret);
    1471         // this one is not in an array because we need it to run last, for cleanup of accidental links within links
     1505
     1506        // Cleanup of accidental links within links
    14721507        $ret = preg_replace("#(<a( [^>]+?>|>))<a [^>]+?>([^>]+?)</a></a>#i", "$1$3</a>", $ret);
    1473         $ret = trim($ret);
    1474         return $ret;
     1508        return substr( $ret, 1, -1 ); // Remove our whitespace padding.
    14751509}
    14761510
    14771511/**
     1512 * Breaks a string into chunks by splitting at whitespace characters.
     1513 * The length of each returned chunk is as close to the specified length goal as possible,
     1514 * with the caveat that each chunk includes its trailing delimiter.
     1515 * Chunks longer than the goal are guaranteed to not have any inner whitespace.
     1516 *
     1517 * Joining the returned chunks with empty delimiters reconstructs the input string losslessly.
     1518 *
     1519 * Input string must have no null characters (or eventual transformations on output chunks must not care about null characters)
     1520 *
     1521 * <code>
     1522 * _split_str_by_whitespace( "1234 67890 1234 67890a cd 1234   890 123456789 1234567890a    45678   1 3 5 7 90 ", 10 ) ==
     1523 * array (
     1524 *   0 => '1234 67890 ',  // 11 characters: Perfect split
     1525 *   1 => '1234 ',        //  5 characters: '1234 67890a' was too long
     1526 *   2 => '67890a cd ',   // 10 characters: '67890a cd 1234' was too long
     1527 *   3 => '1234   890 ',  // 11 characters: Perfect split
     1528 *   4 => '123456789 ',   // 10 characters: '123456789 1234567890a' was too long
     1529 *   5 => '1234567890a ', // 12 characters: Too long, but no inner whitespace on which to split
     1530 *   6 => '   45678   ',  // 11 characters: Perfect split
     1531 *   7 => '1 3 5 7 9',    //  9 characters: End of $string
     1532 * );
     1533 * </code>
     1534 *
     1535 * @param string $string The string to split
     1536 * @param    int $goal   The desired chunk length.
     1537 *
     1538 * @return array Numeric array of chunks.
     1539 */
     1540function _split_str_by_whitespace( $string, $goal ) {
     1541        $chunks = array();
     1542
     1543        $string_nullspace = strtr( $string, "\r\n\t\v\f ", "\000\000\000\000\000\000" );
     1544
     1545        while ( $goal < strlen( $string_nullspace ) ) {
     1546                $pos = strrpos( substr( $string_nullspace, 0, $goal + 1 ), "\000" );
     1547
     1548                if ( false === $pos ) {
     1549                        $pos = strpos( $string_nullspace, "\000", $goal + 1 );
     1550                        if ( false === $pos ) {
     1551                                break;
     1552                        }
     1553                }
     1554
     1555                $chunks[] = substr( $string, 0, $pos + 1 );
     1556                $string = substr( $string, $pos + 1 );
     1557                $string_nullspace = substr( $string_nullspace, $pos + 1 );
     1558        }
     1559
     1560        if ( $string ) {
     1561                $chunks[] = $string;
     1562        }
     1563
     1564        return $chunks;
     1565}
     1566
     1567/**
    14781568 * Adds rel nofollow string to all HTML A elements in content.
    14791569 *
    14801570 * @since 1.5.0