Make WordPress Core


Ignore:
Timestamp:
11/20/2014 02:29:03 PM (10 years ago)
Author:
nacin
Message:

Anchor texturize to shortcodes to improve regex efficiency.

For the 4.0 branch; see [30449] for trunk.

props miqrogroove.
see #29557 for segfault issues.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/4.0/src/wp-includes/formatting.php

    r29707 r30450  
    2929 */
    3030function wptexturize($text, $reset = false) {
    31     global $wp_cockneyreplace;
     31    global $wp_cockneyreplace, $shortcode_tags;
    3232    static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements,
    3333        $default_no_texturize_tags, $default_no_texturize_shortcodes, $run_texturize = true;
     
    206206    // Look for shortcodes and HTML elements.
    207207
     208    $tagnames = array_keys( $shortcode_tags );
     209    $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) );
     210    $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex().
     211
     212    $comment_regex =
     213          '!'           // Start of comment, after the <.
     214        . '(?:'         // Unroll the loop: Consume everything until --> is found.
     215        .     '-(?!->)' // Dash not followed by end of comment.
     216        .     '[^\-]*+' // Consume non-dashes.
     217        . ')*+'         // Loop possessively.
     218        . '-->';        // End of comment.
     219
    208220    $regex =  '/('          // Capture the entire match.
    209221        .   '<'     // Find start of element.
    210222        .   '(?(?=!--)' // Is this a comment?
    211         .       '.+?--\s*>' // Find end of comment
     223        .       $comment_regex  // Find end of comment
    212224        .   '|'
    213225        .       '[^>]+>'    // Find end of element
     
    215227        . '|'
    216228        .   '\['        // Find start of shortcode.
    217         .   '\[?'       // Shortcodes may begin with [[
     229        .   '[\/\[]?'   // Shortcodes may begin with [/ or [[
     230        .   $tagregexp  // Only match registered shortcodes, because performance.
    218231        .   '(?:'
    219         .       '[^\[\]<>]' // Shortcodes do not contain other shortcodes.
     232        .       '[^\[\]<>]+'    // Shortcodes do not contain other shortcodes. Quantifier critical.
    220233        .   '|'
    221         .       '<[^>]+>'   // HTML elements permitted. Prevents matching ] before >.
    222         .   ')++'
     234        .       '<[^\[\]>]*>'   // HTML elements permitted. Prevents matching ] before >.
     235        .   ')*+'       // Possessive critical.
    223236        .   '\]'        // Find end of shortcode.
    224237        .   '\]?'       // Shortcodes may end with ]]
     
    242255            continue;
    243256
    244         } elseif ( '[' === $first && 1 === preg_match( '/^\[(?:[^\[\]<>]|<[^>]+>)++\]$/', $curl ) ) {
     257        } elseif ( '[' === $first && 1 === preg_match( '/^\[\/?' . $tagregexp . '(?:[^\[\]<>]+|<[^\[\]>]*>)*+\]$/', $curl ) ) {
    245258            // This is a shortcode delimiter.
    246259
    247260            _wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes );
    248261
    249         } elseif ( '[' === $first && 1 === preg_match( '/^\[\[?(?:[^\[\]<>]|<[^>]+>)++\]\]?$/', $curl ) ) {
     262        } elseif ( '[' === $first && 1 === preg_match( '/^\[[\/\[]?' . $tagregexp . '(?:[^\[\]<>]+|<[^\[\]>]*>)*+\]\]?$/', $curl ) ) {
    250263            // This is an escaped shortcode delimiter.
    251264
Note: See TracChangeset for help on using the changeset viewer.