Make WordPress Core


Ignore:
Timestamp:
07/23/2015 04:36:55 AM (9 years ago)
Author:
pento
Message:

Shortcodes: Improve the reliablity of shortcodes inside HTML tags.

Merge of [33359] to the 4.1 branch.

Props miqrogroove.

See #15694.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/4.1/src/wp-includes/shortcodes.php

    r30545 r33380  
    182182 *
    183183 * @param string $content Content to search for shortcodes
     184 * @param bool $ignore_html When true, shortcodes inside HTML elements will be skipped.
    184185 * @return string Content with shortcodes filtered out.
    185186 */
    186 function do_shortcode($content) {
     187function do_shortcode( $content, $ignore_html = false ) {
    187188    global $shortcode_tags;
    188189
     
    194195        return $content;
    195196
     197    $tagnames = array_keys($shortcode_tags);
     198    $tagregexp = join( '|', array_map('preg_quote', $tagnames) );
     199    $pattern = "/\\[($tagregexp)/s";
     200
     201    if ( 1 !== preg_match( $pattern, $content ) ) {
     202        // Avoids parsing HTML when there are no shortcodes or embeds anyway.
     203        return $content;
     204    }
     205
     206    $content = do_shortcodes_in_html_tags( $content, $ignore_html );
     207
    196208    $pattern = get_shortcode_regex();
    197     return preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
     209    $content = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
     210   
     211    // Always restore square braces so we don't break things like <!--[if IE ]>
     212    $content = unescape_invalid_shortcodes( $content );
     213   
     214    return $content;
    198215}
    199216
     
    289306
    290307/**
     308 * Search only inside HTML elements for shortcodes and process them.
     309 *
     310 * Any [ or ] characters remaining inside elements will be HTML encoded
     311 * to prevent interference with shortcodes that are outside the elements.
     312 * Assumes $content processed by KSES already.  Users with unfiltered_html
     313 * capability may get unexpected output if angle braces are nested in tags.
     314 *
     315 * @since 4.2.3
     316 *
     317 * @param string $content Content to search for shortcodes
     318 * @param bool $ignore_html When true, all square braces inside elements will be encoded.
     319 * @return string Content with shortcodes filtered out.
     320 */
     321function do_shortcodes_in_html_tags( $content, $ignore_html ) {
     322    // Normalize entities in unfiltered HTML before adding placeholders.
     323    $trans = array( '&#91;' => '&#091;', '&#93;' => '&#093;' );
     324    $content = strtr( $content, $trans );
     325    $trans = array( '[' => '&#91;', ']' => '&#93;' );
     326   
     327    $pattern = get_shortcode_regex();
     328
     329    $comment_regex =
     330          '!'           // Start of comment, after the <.
     331        . '(?:'         // Unroll the loop: Consume everything until --> is found.
     332        .     '-(?!->)' // Dash not followed by end of comment.
     333        .     '[^\-]*+' // Consume non-dashes.
     334        . ')*+'         // Loop possessively.
     335        . '(?:-->)?';   // End of comment. If not found, match all input.
     336
     337    $regex =
     338          '/('                   // Capture the entire match.
     339        .     '<'                // Find start of element.
     340        .     '(?(?=!--)'        // Is this a comment?
     341        .         $comment_regex // Find end of comment.
     342        .     '|'
     343        .         '[^>]*>?'      // Find end of element. If not found, match all input.
     344        .     ')'
     345        . ')/s';
     346
     347    $textarr = preg_split( $regex, $content, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
     348
     349    foreach ( $textarr as &$element ) {
     350        if ( '<' !== $element[0] ) {
     351            continue;
     352        }
     353
     354        $noopen = false === strpos( $element, '[' );
     355        $noclose = false === strpos( $element, ']' );
     356        if ( $noopen || $noclose ) {
     357            // This element does not contain shortcodes.
     358            if ( $noopen xor $noclose ) {
     359                // Need to encode stray [ or ] chars.
     360                $element = strtr( $element, $trans );
     361            }
     362            continue;
     363        }
     364
     365        if ( $ignore_html || '<!--' === substr( $element, 0, 4 ) ) {
     366            // Encode all [ and ] chars.
     367            $element = strtr( $element, $trans );
     368            continue;
     369        }
     370
     371        $attributes = wp_kses_attr_parse( $element );
     372        if ( false === $attributes ) {
     373            // Looks like we found some crazy unfiltered HTML.  Skipping it for sanity.
     374            $element = strtr( $element, $trans );
     375            continue;
     376        }
     377       
     378        // Get element name
     379        $front = array_shift( $attributes );
     380        $back = array_pop( $attributes );
     381        $matches = array();
     382        preg_match('%[a-zA-Z0-9]+%', $front, $matches);
     383        $elname = $matches[0];
     384       
     385        // Look for shortcodes in each attribute separately.
     386        foreach ( $attributes as &$attr ) {
     387            $open = strpos( $attr, '[' );
     388            $close = strpos( $attr, ']' );
     389            if ( false === $open || false === $close ) {
     390                continue; // Go to next attribute.  Square braces will be escaped at end of loop.
     391            }
     392            $double = strpos( $attr, '"' );
     393            $single = strpos( $attr, "'" );
     394            if ( ( false === $single || $open < $single ) && ( false === $double || $open < $double ) ) {
     395                // $attr like '[shortcode]' or 'name = [shortcode]' implies unfiltered_html.
     396                // In this specific situation we assume KSES did not run because the input
     397                // was written by an administrator, so we should avoid changing the output
     398                // and we do not need to run KSES here.
     399                $attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr );
     400            } else {
     401                // $attr like 'name = "[shortcode]"' or "name = '[shortcode]'"
     402                // We do not know if $content was unfiltered. Assume KSES ran before shortcodes.
     403                $count = 0;
     404                $new_attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr, -1, $count );
     405                if ( $count > 0 ) {
     406                    // Sanitize the shortcode output using KSES.
     407                    $new_attr = wp_kses_one_attr( $new_attr, $elname );
     408                    if ( '' !== $new_attr ) {
     409                        // The shortcode is safe to use now.
     410                        $attr = $new_attr;
     411                    }
     412                }
     413            }
     414        }
     415        $element = $front . implode( '', $attributes ) . $back;
     416       
     417        // Now encode any remaining [ or ] chars.
     418        $element = strtr( $element, $trans );
     419    }
     420   
     421    $content = implode( '', $textarr );
     422   
     423    return $content;
     424}
     425
     426/**
     427 * Remove placeholders added by do_shortcodes_in_html_tags().
     428 *
     429 * @since 4.2.3
     430 *
     431 * @param string $content Content to search for placeholders.
     432 * @return string Content with placeholders removed.
     433 */
     434function unescape_invalid_shortcodes( $content ) {
     435        // Clean up entire string, avoids re-parsing HTML.
     436        $trans = array( '&#91;' => '[', '&#93;' => ']' );
     437        $content = strtr( $content, $trans );
     438       
     439        return $content;
     440}
     441
     442/**
    291443 * Retrieve all attributes from the shortcodes tag.
    292444 *
     
    387539        return $content;
    388540
     541    $content = do_shortcodes_in_html_tags( $content, true );
     542
    389543    $pattern = get_shortcode_regex();
    390 
    391     return preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
     544    $content = preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
     545
     546    // Always restore square braces so we don't break things like <!--[if IE ]>
     547    $content = unescape_invalid_shortcodes( $content );
     548   
     549    return $content;
    392550}
    393551
Note: See TracChangeset for help on using the changeset viewer.