Make WordPress Core


Ignore:
Timestamp:
07/22/2015 05:14:50 AM (9 years ago)
Author:
pento
Message:

Shortcodes: Improve the reliablity of shortcodes inside HTML tags.

Props miqrogroove.

See #15694.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/shortcodes.php

    r33118 r33359  
    183183 *
    184184 * @param string $content Content to search for shortcodes.
     185 * @param bool $ignore_html When true, shortcodes inside HTML elements will be skipped.
    185186 * @return string Content with shortcodes filtered out.
    186187 */
    187 function do_shortcode($content) {
     188function do_shortcode( $content, $ignore_html = false ) {
    188189    global $shortcode_tags;
    189190
     
    195196        return $content;
    196197
     198    $tagnames = array_keys($shortcode_tags);
     199    $tagregexp = join( '|', array_map('preg_quote', $tagnames) );
     200    $pattern = "/\\[($tagregexp)/s";
     201
     202    if ( 1 !== preg_match( $pattern, $content ) ) {
     203        // Avoids parsing HTML when there are no shortcodes or embeds anyway.
     204        return $content;
     205    }
     206
     207    $content = do_shortcodes_in_html_tags( $content, $ignore_html );
     208
    197209    $pattern = get_shortcode_regex();
    198     return preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
     210    $content = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
     211   
     212    // Always restore square braces so we don't break things like <!--[if IE ]>
     213    $content = unescape_invalid_shortcodes( $content );
     214   
     215    return $content;
    199216}
    200217
     
    297314
    298315/**
     316 * Search only inside HTML elements for shortcodes and process them.
     317 *
     318 * Any [ or ] characters remaining inside elements will be HTML encoded
     319 * to prevent interference with shortcodes that are outside the elements.
     320 * Assumes $content processed by KSES already.  Users with unfiltered_html
     321 * capability may get unexpected output if angle braces are nested in tags.
     322 *
     323 * @since 4.2.3
     324 *
     325 * @param string $content Content to search for shortcodes
     326 * @param bool $ignore_html When true, all square braces inside elements will be encoded.
     327 * @return string Content with shortcodes filtered out.
     328 */
     329function do_shortcodes_in_html_tags( $content, $ignore_html ) {
     330    // Normalize entities in unfiltered HTML before adding placeholders.
     331    $trans = array( '&#91;' => '&#091;', '&#93;' => '&#093;' );
     332    $content = strtr( $content, $trans );
     333    $trans = array( '[' => '&#91;', ']' => '&#93;' );
     334   
     335    $pattern = get_shortcode_regex();
     336
     337    $comment_regex =
     338          '!'           // Start of comment, after the <.
     339        . '(?:'         // Unroll the loop: Consume everything until --> is found.
     340        .     '-(?!->)' // Dash not followed by end of comment.
     341        .     '[^\-]*+' // Consume non-dashes.
     342        . ')*+'         // Loop possessively.
     343        . '(?:-->)?';   // End of comment. If not found, match all input.
     344
     345    $regex =
     346          '/('                   // Capture the entire match.
     347        .     '<'                // Find start of element.
     348        .     '(?(?=!--)'        // Is this a comment?
     349        .         $comment_regex // Find end of comment.
     350        .     '|'
     351        .         '[^>]*>?'      // Find end of element. If not found, match all input.
     352        .     ')'
     353        . ')/s';
     354
     355    $textarr = preg_split( $regex, $content, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
     356
     357    foreach ( $textarr as &$element ) {
     358        if ( '<' !== $element[0] ) {
     359            continue;
     360        }
     361
     362        $noopen = false === strpos( $element, '[' );
     363        $noclose = false === strpos( $element, ']' );
     364        if ( $noopen || $noclose ) {
     365            // This element does not contain shortcodes.
     366            if ( $noopen xor $noclose ) {
     367                // Need to encode stray [ or ] chars.
     368                $element = strtr( $element, $trans );
     369            }
     370            continue;
     371        }
     372
     373        if ( $ignore_html || '<!--' === substr( $element, 0, 4 ) ) {
     374            // Encode all [ and ] chars.
     375            $element = strtr( $element, $trans );
     376            continue;
     377        }
     378
     379        $attributes = wp_kses_attr_parse( $element );
     380        if ( false === $attributes ) {
     381            // Looks like we found some crazy unfiltered HTML.  Skipping it for sanity.
     382            $element = strtr( $element, $trans );
     383            continue;
     384        }
     385       
     386        // Get element name
     387        $front = array_shift( $attributes );
     388        $back = array_pop( $attributes );
     389        $matches = array();
     390        preg_match('%[a-zA-Z0-9]+%', $front, $matches);
     391        $elname = $matches[0];
     392       
     393        // Look for shortcodes in each attribute separately.
     394        foreach ( $attributes as &$attr ) {
     395            $open = strpos( $attr, '[' );
     396            $close = strpos( $attr, ']' );
     397            if ( false === $open || false === $close ) {
     398                continue; // Go to next attribute.  Square braces will be escaped at end of loop.
     399            }
     400            $double = strpos( $attr, '"' );
     401            $single = strpos( $attr, "'" );
     402            if ( ( false === $single || $open < $single ) && ( false === $double || $open < $double ) ) {
     403                // $attr like '[shortcode]' or 'name = [shortcode]' implies unfiltered_html.
     404                // In this specific situation we assume KSES did not run because the input
     405                // was written by an administrator, so we should avoid changing the output
     406                // and we do not need to run KSES here.
     407                $attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr );
     408            } else {
     409                // $attr like 'name = "[shortcode]"' or "name = '[shortcode]'"
     410                // We do not know if $content was unfiltered. Assume KSES ran before shortcodes.
     411                $count = 0;
     412                $new_attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr, -1, $count );
     413                if ( $count > 0 ) {
     414                    // Sanitize the shortcode output using KSES.
     415                    $new_attr = wp_kses_one_attr( $new_attr, $elname );
     416                    if ( '' !== $new_attr ) {
     417                        // The shortcode is safe to use now.
     418                        $attr = $new_attr;
     419                    }
     420                }
     421            }
     422        }
     423        $element = $front . implode( '', $attributes ) . $back;
     424       
     425        // Now encode any remaining [ or ] chars.
     426        $element = strtr( $element, $trans );
     427    }
     428   
     429    $content = implode( '', $textarr );
     430   
     431    return $content;
     432}
     433
     434/**
     435 * Remove placeholders added by do_shortcodes_in_html_tags().
     436 *
     437 * @since 4.2.3
     438 *
     439 * @param string $content Content to search for placeholders.
     440 * @return string Content with placeholders removed.
     441 */
     442function unescape_invalid_shortcodes( $content ) {
     443        // Clean up entire string, avoids re-parsing HTML.
     444        $trans = array( '&#91;' => '[', '&#93;' => ']' );
     445        $content = strtr( $content, $trans );
     446       
     447        return $content;
     448}
     449
     450/**
    299451 * Retrieve all attributes from the shortcodes tag.
    300452 *
     
    395547        return $content;
    396548
     549    $content = do_shortcodes_in_html_tags( $content, true );
     550
    397551    $pattern = get_shortcode_regex();
    398 
    399     return preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
     552    $content = preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
     553
     554    // Always restore square braces so we don't break things like <!--[if IE ]>
     555    $content = unescape_invalid_shortcodes( $content );
     556   
     557    return $content;
    400558}
    401559
Note: See TracChangeset for help on using the changeset viewer.