Make WordPress Core


Ignore:
Timestamp:
07/23/2015 04:49:25 AM (9 years ago)
Author:
pento
Message:

Shortcodes: Improve the reliablity of shortcodes inside HTML tags.

Merge of [33359] to the 4.0 branch.

Props miqrogroove.

See #15694.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/4.0/src/wp-includes/shortcodes.php

    r29207 r33381  
    189189 *
    190190 * @param string $content Content to search for shortcodes
     191 * @param bool $ignore_html When true, shortcodes inside HTML elements will be skipped.
    191192 * @return string Content with shortcodes filtered out.
    192193 */
    193 function do_shortcode($content) {
     194function do_shortcode( $content, $ignore_html = false ) {
    194195    global $shortcode_tags;
    195196
     
    201202        return $content;
    202203
     204    $tagnames = array_keys($shortcode_tags);
     205    $tagregexp = join( '|', array_map('preg_quote', $tagnames) );
     206    $pattern = "/\\[($tagregexp)/s";
     207
     208    if ( 1 !== preg_match( $pattern, $content ) ) {
     209        // Avoids parsing HTML when there are no shortcodes or embeds anyway.
     210        return $content;
     211    }
     212
     213    $content = do_shortcodes_in_html_tags( $content, $ignore_html );
     214
    203215    $pattern = get_shortcode_regex();
    204     return preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
     216    $content = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
     217   
     218    // Always restore square braces so we don't break things like <!--[if IE ]>
     219    $content = unescape_invalid_shortcodes( $content );
     220   
     221    return $content;
    205222}
    206223
     
    296313
    297314/**
     315 * Search only inside HTML elements for shortcodes and process them.
     316 *
     317 * Any [ or ] characters remaining inside elements will be HTML encoded
     318 * to prevent interference with shortcodes that are outside the elements.
     319 * Assumes $content processed by KSES already.  Users with unfiltered_html
     320 * capability may get unexpected output if angle braces are nested in tags.
     321 *
     322 * @since 4.2.3
     323 *
     324 * @param string $content Content to search for shortcodes
     325 * @param bool $ignore_html When true, all square braces inside elements will be encoded.
     326 * @return string Content with shortcodes filtered out.
     327 */
     328function do_shortcodes_in_html_tags( $content, $ignore_html ) {
     329    // Normalize entities in unfiltered HTML before adding placeholders.
     330    $trans = array( '&#91;' => '&#091;', '&#93;' => '&#093;' );
     331    $content = strtr( $content, $trans );
     332    $trans = array( '[' => '&#91;', ']' => '&#93;' );
     333   
     334    $pattern = get_shortcode_regex();
     335
     336    $comment_regex =
     337          '!'           // Start of comment, after the <.
     338        . '(?:'         // Unroll the loop: Consume everything until --> is found.
     339        .     '-(?!->)' // Dash not followed by end of comment.
     340        .     '[^\-]*+' // Consume non-dashes.
     341        . ')*+'         // Loop possessively.
     342        . '(?:-->)?';   // End of comment. If not found, match all input.
     343
     344    $regex =
     345          '/('                   // Capture the entire match.
     346        .     '<'                // Find start of element.
     347        .     '(?(?=!--)'        // Is this a comment?
     348        .         $comment_regex // Find end of comment.
     349        .     '|'
     350        .         '[^>]*>?'      // Find end of element. If not found, match all input.
     351        .     ')'
     352        . ')/s';
     353
     354    $textarr = preg_split( $regex, $content, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
     355
     356    foreach ( $textarr as &$element ) {
     357        if ( '<' !== $element[0] ) {
     358            continue;
     359        }
     360
     361        $noopen = false === strpos( $element, '[' );
     362        $noclose = false === strpos( $element, ']' );
     363        if ( $noopen || $noclose ) {
     364            // This element does not contain shortcodes.
     365            if ( $noopen xor $noclose ) {
     366                // Need to encode stray [ or ] chars.
     367                $element = strtr( $element, $trans );
     368            }
     369            continue;
     370        }
     371
     372        if ( $ignore_html || '<!--' === substr( $element, 0, 4 ) ) {
     373            // Encode all [ and ] chars.
     374            $element = strtr( $element, $trans );
     375            continue;
     376        }
     377
     378        $attributes = wp_kses_attr_parse( $element );
     379        if ( false === $attributes ) {
     380            // Looks like we found some crazy unfiltered HTML.  Skipping it for sanity.
     381            $element = strtr( $element, $trans );
     382            continue;
     383        }
     384       
     385        // Get element name
     386        $front = array_shift( $attributes );
     387        $back = array_pop( $attributes );
     388        $matches = array();
     389        preg_match('%[a-zA-Z0-9]+%', $front, $matches);
     390        $elname = $matches[0];
     391       
     392        // Look for shortcodes in each attribute separately.
     393        foreach ( $attributes as &$attr ) {
     394            $open = strpos( $attr, '[' );
     395            $close = strpos( $attr, ']' );
     396            if ( false === $open || false === $close ) {
     397                continue; // Go to next attribute.  Square braces will be escaped at end of loop.
     398            }
     399            $double = strpos( $attr, '"' );
     400            $single = strpos( $attr, "'" );
     401            if ( ( false === $single || $open < $single ) && ( false === $double || $open < $double ) ) {
     402                // $attr like '[shortcode]' or 'name = [shortcode]' implies unfiltered_html.
     403                // In this specific situation we assume KSES did not run because the input
     404                // was written by an administrator, so we should avoid changing the output
     405                // and we do not need to run KSES here.
     406                $attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr );
     407            } else {
     408                // $attr like 'name = "[shortcode]"' or "name = '[shortcode]'"
     409                // We do not know if $content was unfiltered. Assume KSES ran before shortcodes.
     410                $count = 0;
     411                $new_attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr, -1, $count );
     412                if ( $count > 0 ) {
     413                    // Sanitize the shortcode output using KSES.
     414                    $new_attr = wp_kses_one_attr( $new_attr, $elname );
     415                    if ( '' !== $new_attr ) {
     416                        // The shortcode is safe to use now.
     417                        $attr = $new_attr;
     418                    }
     419                }
     420            }
     421        }
     422        $element = $front . implode( '', $attributes ) . $back;
     423       
     424        // Now encode any remaining [ or ] chars.
     425        $element = strtr( $element, $trans );
     426    }
     427   
     428    $content = implode( '', $textarr );
     429   
     430    return $content;
     431}
     432
     433/**
     434 * Remove placeholders added by do_shortcodes_in_html_tags().
     435 *
     436 * @since 4.2.3
     437 *
     438 * @param string $content Content to search for placeholders.
     439 * @return string Content with placeholders removed.
     440 */
     441function unescape_invalid_shortcodes( $content ) {
     442        // Clean up entire string, avoids re-parsing HTML.
     443        $trans = array( '&#91;' => '[', '&#93;' => ']' );
     444        $content = strtr( $content, $trans );
     445       
     446        return $content;
     447}
     448
     449/**
    298450 * Retrieve all attributes from the shortcodes tag.
    299451 *
     
    394546        return $content;
    395547
     548    $content = do_shortcodes_in_html_tags( $content, true );
     549
    396550    $pattern = get_shortcode_regex();
    397 
    398     return preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
     551    $content = preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
     552
     553    // Always restore square braces so we don't break things like <!--[if IE ]>
     554    $content = unescape_invalid_shortcodes( $content );
     555   
     556    return $content;
    399557}
    400558
Note: See TracChangeset for help on using the changeset viewer.