Make WordPress Core


Ignore:
Timestamp:
10/01/2015 06:04:13 PM (9 years ago)
Author:
wonderboymusic
Message:

Shortcodes: Fix PCRE performance bugs in get_shortcode_regexp() and related to wptexturize(), do_shortcode(), and strip_shortcodes()

Alters unit tests.

Props miqrogroove.
Fixes #33517.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/formatting.php

    r34727 r34747  
    217217    // Look for shortcodes and HTML elements.
    218218
    219     $tagnames = array_keys( $shortcode_tags );
    220     $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) );
    221     $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex().
     219    preg_match_all( '@\[/?([^<>&/\[\]\x00-\x20]++)@', $text, $matches );
     220    $tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] );
     221    $found_shortcodes = ! empty( $tagnames );
     222    if ( $found_shortcodes ) {
     223        $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) );
     224        $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex().
     225        $shortcode_regex =
     226              '\['              // Find start of shortcode.
     227            . '[\/\[]?'         // Shortcodes may begin with [/ or [[
     228            . $tagregexp        // Only match registered shortcodes, because performance.
     229            . '(?:'
     230            .     '[^\[\]<>]+'  // Shortcodes do not contain other shortcodes. Quantifier critical.
     231            . '|'
     232            .     '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >.
     233            . ')*+'             // Possessive critical.
     234            . '\]'              // Find end of shortcode.
     235            . '\]?';            // Shortcodes may end with ]]
     236    }
    222237
    223238    $comment_regex =
     
    229244        . '(?:-->)?';   // End of comment. If not found, match all input.
    230245
    231     $shortcode_regex =
    232           '\['              // Find start of shortcode.
    233         . '[\/\[]?'         // Shortcodes may begin with [/ or [[
    234         . $tagregexp        // Only match registered shortcodes, because performance.
    235         . '(?:'
    236         .     '[^\[\]<>]+'  // Shortcodes do not contain other shortcodes. Quantifier critical.
     246    $html_regex =            // Needs replaced with wp_html_split() per Shortcode API Roadmap.
     247          '<'                // Find start of element.
     248        . '(?(?=!--)'        // Is this a comment?
     249        .     $comment_regex // Find end of comment.
    237250        . '|'
    238         .     '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >.
    239         . ')*+'             // Possessive critical.
    240         . '\]'              // Find end of shortcode.
    241         . '\]?';            // Shortcodes may end with ]]
    242 
    243     $regex =
    244           '/('                   // Capture the entire match.
    245         .     '<'                // Find start of element.
    246         .     '(?(?=!--)'        // Is this a comment?
    247         .         $comment_regex // Find end of comment.
    248         .     '|'
    249         .         '[^>]*>'       // Find end of element.
    250         .     ')'
    251         . '|'
    252         .     $shortcode_regex   // Find shortcodes.
    253         . ')/s';
     251        .     '[^>]*>?'      // Find end of element. If not found, match all input.
     252        . ')';
     253
     254    if ( $found_shortcodes ) {
     255        $regex = '/(' . $html_regex . '|' . $shortcode_regex . ')/s';
     256    } else {
     257        $regex = '/(' . $html_regex . ')/s';
     258    }
    254259
    255260    $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
     
    258263        // Only call _wptexturize_pushpop_element if $curl is a delimiter.
    259264        $first = $curl[0];
    260         if ( '<' === $first && '<!--' === substr( $curl, 0, 4 ) ) {
    261             // This is an HTML comment delimiter.
    262 
    263             continue;
    264 
    265         } elseif ( '<' === $first && '>' === substr( $curl, -1 ) ) {
    266             // This is an HTML element delimiter.
    267 
    268             _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
     265        if ( '<' === $first ) {
     266            if ( '<!--' === substr( $curl, 0, 4 ) ) {
     267                // This is an HTML comment delimeter.
     268                continue;
     269            } else {
     270                // This is an HTML element delimiter.
     271                _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
     272            }
    269273
    270274        } elseif ( '' === trim( $curl ) ) {
    271275            // This is a newline between delimiters.  Performance improves when we check this.
    272 
    273276            continue;
    274277
    275         } elseif ( '[' === $first && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) {
     278        } elseif ( '[' === $first && $found_shortcodes && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) {
    276279            // This is a shortcode delimiter.
    277280
Note: See TracChangeset for help on using the changeset viewer.