WordPress.org

Make WordPress Core

Ticket #16892: 16892.6.patch

File 16892.6.patch, 6.0 KB (added by mdawaffe, 7 years ago)
  • wp-includes/formatting.php

     
    13011301 */
    13021302function _make_url_clickable_cb($matches) {
    13031303        $url = $matches[2];
    1304         $suffix = '';
    13051304
    1306         /** Include parentheses in the URL only if paired **/
     1305        if ( ')' == $matches[3] && strpos( $url, '(' ) ) {
     1306                // If the trailing character is a closing parethesis, and the URL has an opening parenthesis in it, add the closing parenthesis to the URL.
     1307                // Then we can let the parenthesis balancer do its thing below.
     1308                $url .= $matches[3];
     1309                $suffix = '';
     1310        } else {
     1311                $suffix = $matches[3];
     1312        }
     1313
     1314        // Include parentheses in the URL only if paired
    13071315        while ( substr_count( $url, '(' ) < substr_count( $url, ')' ) ) {
    13081316                $suffix = strrchr( $url, ')' ) . $suffix;
    13091317                $url = substr( $url, 0, strrpos( $url, ')' ) );
     
    13721380 * @param string $ret Content to convert URIs.
    13731381 * @return string Content with converted URIs.
    13741382 */
    1375 function make_clickable($ret) {
    1376         $ret = ' ' . $ret;
    1377         // in testing, using arrays here was found to be faster
    1378         $save = @ini_set('pcre.recursion_limit', 10000);
    1379         $retval = preg_replace_callback('#(?<!=[\'"])(?<=[*\')+.,;:!&$\s>])(\()?([\w]+?://(?:[\w\\x80-\\xff\#%~/?@\[\]-]{1,2000}|[\'*(+.,;:!=&$](?![\b\)]|(\))?([\s]|$))|(?(1)\)(?![\s<.,;:]|$)|\)))+)#is', '_make_url_clickable_cb', $ret);
    1380         if (null !== $retval )
    1381                 $ret = $retval;
    1382         @ini_set('pcre.recursion_limit', $save);
     1383function make_clickable( $ret ) {
     1384        // Long strings might contain expensive edge cases ...
     1385        if ( 10000 < strlen( $ret ) ) {
     1386                $r = '';
     1387                // ... break it up
     1388                foreach ( _split_str_by_whitespace( $ret, 2100 ) as $chunk ) { // 2100: Extra room for scheme and leading and trailing paretheses
     1389                        if ( 2101 < strlen( $chunk ) ) {
     1390                                $r .= $chunk; // Too big, no whitespace: bail.
     1391                        } else {
     1392                                $r .= make_clickable( $chunk );
     1393                        }
     1394                }
     1395                return $r;
     1396        }
     1397
     1398        $ret = " $ret "; // Pad with whitespace to simplify the regexes
     1399
     1400        $url_clickable = '~
     1401                ([\\s\\(<])                                            # 1: Leading whitespace, opening parethesis (for parethesis balancing post processing), or opening angle bracket
     1402                (                                                      # 2: URL
     1403                        [\\w]{1,20}+://                                # Scheme and hier-part prefix
     1404                        (?=\S{1,2000}\s)                               # Limit to URLs less than about 2000 characters long
     1405                        [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]*+         # Non-punctuation URL character
     1406                        (?:                                            # Unroll the Loop: Only allow puctuation URL character if followed by a non-punctuation URL character
     1407                                [\'.,;:!?)]                            # Punctuation URL character
     1408                                [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]++ # Non-punctuation URL character
     1409                        )*
     1410                )
     1411                (\)?)                                                  # 3: Trailing closing parenthesis (for parethesis balancing post processing)
     1412        ~xS'; // The regex is a non-anchored pattern and does not have a single fixed starting character.
     1413              // Tell PCRE to spend more time optimizing since, when used on a page load, it will probably be used several times.
     1414
     1415        $ret = preg_replace_callback( $url_clickable, '_make_url_clickable_cb', $ret );
     1416
    13831417        $ret = preg_replace_callback('#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]+)#is', '_make_web_ftp_clickable_cb', $ret);
    13841418        $ret = preg_replace_callback('#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret);
    1385         // this one is not in an array because we need it to run last, for cleanup of accidental links within links
     1419
     1420        // Cleanup of accidental links within links
    13861421        $ret = preg_replace("#(<a( [^>]+?>|>))<a [^>]+?>([^>]+?)</a></a>#i", "$1$3</a>", $ret);
    1387         $ret = trim($ret);
    1388         return $ret;
     1422        return substr( $ret, 1, -1 ); // Remove our whitespace padding.
    13891423}
    13901424
    13911425/**
     1426 * Breaks a string into chunks by splitting at whitespace characters.
     1427 * The length of each returned chunk is as close to the specified length goal as possible,
     1428 * with the caveat that each chunk includes its trailing delimiter.
     1429 * Chunks longer than the goal are guaranteed to not have any inner whitespace.
     1430 *
     1431 * Joining the returned chunks with empty delimiters reconstructs the input string losslessly.
     1432 *
     1433 * Input string must have no null characters (or eventual transformations on output chunks must not care about null characters)
     1434 *
     1435 * <code>
     1436 * _split_str_by_whitespace( "1234 67890 1234 67890a cd 1234   890 123456789 1234567890a    45678   1 3 5 7 90 ", 10 ) ==
     1437 * array (
     1438 *   0 => '1234 67890 ',  // 11 characters: Perfect split
     1439 *   1 => '1234 ',        //  5 characters: '1234 67890a' was too long
     1440 *   2 => '67890a cd ',   // 10 characters: '67890a cd 1234' was too long
     1441 *   3 => '1234   890 ',  // 11 characters: Perfect split
     1442 *   4 => '123456789 ',   // 10 characters: '123456789 1234567890a' was too long
     1443 *   5 => '1234567890a ', // 12 characters: Too long, but no inner whitespace on which to split
     1444 *   6 => '   45678   ',  // 11 characters: Perfect split
     1445 *   7 => '1 3 5 7 9',    //  9 characters: End of $string
     1446 * );
     1447 * </code>
     1448 *
     1449 * @param string $string The string to split
     1450 * @param    int $goal   The desired chunk length.
     1451 *
     1452 * @return array Numeric array of chunks.
     1453 */
     1454function _split_str_by_whitespace( $string, $goal ) {
     1455        $chunks = array();
     1456
     1457        $string_nullspace = strtr( $string, "\r\n\t\v\f ", "\000\000\000\000\000\000" );
     1458
     1459        while ( $goal < strlen( $string_nullspace ) ) {
     1460                $pos = strrpos( substr( $string_nullspace, 0, $goal + 1 ), "\000" );
     1461
     1462                if ( false === $pos ) {
     1463                        $pos = strpos( $string_nullspace, "\000", $goal + 1 );
     1464                        if ( false === $pos ) {
     1465                                break;
     1466                        }
     1467                }
     1468
     1469                $chunks[] = substr( $string, 0, $pos + 1 );
     1470                $string = substr( $string, $pos + 1 );
     1471                $string_nullspace = substr( $string_nullspace, $pos + 1 );
     1472        }
     1473
     1474        if ( $string ) {
     1475                $chunks[] = $string;
     1476        }
     1477
     1478        return $chunks;
     1479}
     1480
     1481/**
    13921482 * Adds rel nofollow string to all HTML A elements in content.
    13931483 *
    13941484 * @since 1.5.0