Make WordPress Core


Ignore:
Timestamp:
04/17/2012 08:02:49 PM (13 years ago)
Author:
ryan
Message:

Don't attempt to make links inside attributes clickable.

Location:
branches/3.3
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/3.3

  • branches/3.3/wp-includes/formatting.php

    r19187 r20493  
    13511351function _make_url_clickable_cb($matches) {
    13521352    $url = $matches[2];
    1353     $suffix = '';
    1354 
    1355     /** Include parentheses in the URL only if paired **/
     1353
     1354    if ( ')' == $matches[3] && strpos( $url, '(' ) ) {
     1355        // If the trailing character is a closing parethesis, and the URL has an opening parenthesis in it, add the closing parenthesis to the URL.
     1356        // Then we can let the parenthesis balancer do its thing below.
     1357        $url .= $matches[3];
     1358        $suffix = '';
     1359    } else {
     1360        $suffix = $matches[3];
     1361    }
     1362
     1363    // Include parentheses in the URL only if paired
    13561364    while ( substr_count( $url, '(' ) < substr_count( $url, ')' ) ) {
    13571365        $suffix = strrchr( $url, ')' ) . $suffix;
     
    14191427 * @since 0.71
    14201428 *
    1421  * @param string $ret Content to convert URIs.
     1429 * @param string $text Content to convert URIs.
    14221430 * @return string Content with converted URIs.
    14231431 */
    1424 function make_clickable($ret) {
    1425     $ret = ' ' . $ret;
    1426     // in testing, using arrays here was found to be faster
    1427     $save = @ini_set('pcre.recursion_limit', 10000);
    1428     $retval = preg_replace_callback('#(?<!=[\'"])(?<=[*\')+.,;:!&$\s>])(\()?([\w]+?://(?:[\w\\x80-\\xff\#%~/?@\[\]-]{1,2000}|[\'*(+.,;:!=&$](?![\b\)]|(\))?([\s]|$))|(?(1)\)(?![\s<.,;:]|$)|\)))+)#is', '_make_url_clickable_cb', $ret);
    1429     if (null !== $retval )
    1430         $ret = $retval;
    1431     @ini_set('pcre.recursion_limit', $save);
    1432     $ret = preg_replace_callback('#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]+)#is', '_make_web_ftp_clickable_cb', $ret);
    1433     $ret = preg_replace_callback('#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret);
    1434     // this one is not in an array because we need it to run last, for cleanup of accidental links within links
    1435     $ret = preg_replace("#(<a( [^>]+?>|>))<a [^>]+?>([^>]+?)</a></a>#i", "$1$3</a>", $ret);
    1436     $ret = trim($ret);
    1437     return $ret;
     1432function make_clickable( $text ) {
     1433    $r = '';
     1434    $textarr = preg_split( '/(<[^<>]+>)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); // split out HTML tags
     1435    foreach ( $textarr as $piece ) {
     1436        if ( empty( $piece ) || ( $piece[0] == '<' && ! preg_match('|^<\s*[\w]{1,20}+://|', $piece) ) ) {
     1437            $r .= $piece;
     1438            continue;
     1439        }
     1440
     1441        // Long strings might contain expensive edge cases ...
     1442        if ( 10000 < strlen( $piece ) ) {
     1443            // ... break it up
     1444            foreach ( _split_str_by_whitespace( $piece, 2100 ) as $chunk ) { // 2100: Extra room for scheme and leading and trailing paretheses
     1445                if ( 2101 < strlen( $chunk ) ) {
     1446                    $r .= $chunk; // Too big, no whitespace: bail.
     1447                } else {
     1448                    $r .= make_clickable( $chunk );
     1449                }
     1450            }
     1451        } else {
     1452            $ret = " $piece "; // Pad with whitespace to simplify the regexes
     1453
     1454            $url_clickable = '~
     1455                ([\\s(<.,;:!?])                                        # 1: Leading whitespace, or punctuation
     1456                (                                                      # 2: URL
     1457                    [\\w]{1,20}+://                                # Scheme and hier-part prefix
     1458                    (?=\S{1,2000}\s)                               # Limit to URLs less than about 2000 characters long
     1459                    [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]*+         # Non-punctuation URL character
     1460                    (?:                                            # Unroll the Loop: Only allow puctuation URL character if followed by a non-punctuation URL character
     1461                        [\'.,;:!?)]                            # Punctuation URL character
     1462                        [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]++ # Non-punctuation URL character
     1463                    )*
     1464                )
     1465                (\)?)                                                  # 3: Trailing closing parenthesis (for parethesis balancing post processing)
     1466            ~xS'; // The regex is a non-anchored pattern and does not have a single fixed starting character.
     1467                  // Tell PCRE to spend more time optimizing since, when used on a page load, it will probably be used several times.
     1468
     1469            $ret = preg_replace_callback( $url_clickable, '_make_url_clickable_cb', $ret );
     1470
     1471            $ret = preg_replace_callback( '#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]+)#is', '_make_web_ftp_clickable_cb', $ret );
     1472            $ret = preg_replace_callback( '#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret );
     1473
     1474            $ret = substr( $ret, 1, -1 ); // Remove our whitespace padding.
     1475            $r .= $ret;
     1476        }
     1477    }
     1478
     1479    // Cleanup of accidental links within links
     1480    $r = preg_replace( '#(<a( [^>]+?>|>))<a [^>]+?>([^>]+?)</a></a>#i', "$1$3</a>", $r );
     1481    return $r;
     1482}
     1483
     1484/**
     1485 * Breaks a string into chunks by splitting at whitespace characters.
     1486 * The length of each returned chunk is as close to the specified length goal as possible,
     1487 * with the caveat that each chunk includes its trailing delimiter.
     1488 * Chunks longer than the goal are guaranteed to not have any inner whitespace.
     1489 *
     1490 * Joining the returned chunks with empty delimiters reconstructs the input string losslessly.
     1491 *
     1492 * Input string must have no null characters (or eventual transformations on output chunks must not care about null characters)
     1493 *
     1494 * <code>
     1495 * _split_str_by_whitespace( "1234 67890 1234 67890a cd 1234   890 123456789 1234567890a    45678   1 3 5 7 90 ", 10 ) ==
     1496 * array (
     1497 *   0 => '1234 67890 ',  // 11 characters: Perfect split
     1498 *   1 => '1234 ',        //  5 characters: '1234 67890a' was too long
     1499 *   2 => '67890a cd ',   // 10 characters: '67890a cd 1234' was too long
     1500 *   3 => '1234   890 ',  // 11 characters: Perfect split
     1501 *   4 => '123456789 ',   // 10 characters: '123456789 1234567890a' was too long
     1502 *   5 => '1234567890a ', // 12 characters: Too long, but no inner whitespace on which to split
     1503 *   6 => '   45678   ',  // 11 characters: Perfect split
     1504 *   7 => '1 3 5 7 9',    //  9 characters: End of $string
     1505 * );
     1506 * </code>
     1507 *
     1508 * @since 3.4.0
     1509 * @access private
     1510 *
     1511 * @param string $string The string to split
     1512 * @param    int $goal   The desired chunk length.
     1513 * @return array Numeric array of chunks.
     1514 */
     1515function _split_str_by_whitespace( $string, $goal ) {
     1516    $chunks = array();
     1517
     1518    $string_nullspace = strtr( $string, "\r\n\t\v\f ", "\000\000\000\000\000\000" );
     1519
     1520    while ( $goal < strlen( $string_nullspace ) ) {
     1521        $pos = strrpos( substr( $string_nullspace, 0, $goal + 1 ), "\000" );
     1522
     1523        if ( false === $pos ) {
     1524            $pos = strpos( $string_nullspace, "\000", $goal + 1 );
     1525            if ( false === $pos ) {
     1526                break;
     1527            }
     1528        }
     1529
     1530        $chunks[] = substr( $string, 0, $pos + 1 );
     1531        $string = substr( $string, $pos + 1 );
     1532        $string_nullspace = substr( $string_nullspace, $pos + 1 );
     1533    }
     1534
     1535    if ( $string ) {
     1536        $chunks[] = $string;
     1537    }
     1538
     1539    return $chunks;
    14381540}
    14391541
Note: See TracChangeset for help on using the changeset viewer.