Make WordPress Core


Ignore:
Timestamp:
07/23/2015 05:14:09 AM (10 years ago)
Author:
pento
Message:

Shortcodes: Improve the reliablity of shortcodes inside HTML tags.

Merge of [33359] to the 3.7 branch.

Props miqrogroove.

See #15694.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/3.7/src/wp-includes/shortcodes.php

    r25881 r33389  
    177177 *
    178178 * @param string $content Content to search for shortcodes
     179 * @param bool $ignore_html When true, shortcodes inside HTML elements will be skipped.
    179180 * @return string Content with shortcodes filtered out.
    180181 */
    181 function do_shortcode($content) {
    182     global $shortcode_tags;
     182function do_shortcode( $content, $ignore_html = false ) {
     183    global $shortcode_tags;
     184
     185    if ( false === strpos( $content, '[' ) ) {
     186        return $content;
     187    }
    183188
    184189    if (empty($shortcode_tags) || !is_array($shortcode_tags))
    185190        return $content;
    186191
     192    $tagnames = array_keys($shortcode_tags);
     193    $tagregexp = join( '|', array_map('preg_quote', $tagnames) );
     194    $pattern = "/\\[($tagregexp)/s";
     195
     196    if ( 1 !== preg_match( $pattern, $content ) ) {
     197        // Avoids parsing HTML when there are no shortcodes or embeds anyway.
     198        return $content;
     199    }
     200
     201    $content = do_shortcodes_in_html_tags( $content, $ignore_html );
     202
    187203    $pattern = get_shortcode_regex();
    188     return preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
     204    $content = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
     205   
     206    // Always restore square braces so we don't break things like <!--[if IE ]>
     207    $content = unescape_invalid_shortcodes( $content );
     208   
     209    return $content;
    189210}
    190211
     
    279300
    280301/**
     302 * Search only inside HTML elements for shortcodes and process them.
     303 *
     304 * Any [ or ] characters remaining inside elements will be HTML encoded
     305 * to prevent interference with shortcodes that are outside the elements.
     306 * Assumes $content processed by KSES already.  Users with unfiltered_html
     307 * capability may get unexpected output if angle braces are nested in tags.
     308 *
     309 * @since 4.2.3
     310 *
     311 * @param string $content Content to search for shortcodes
     312 * @param bool $ignore_html When true, all square braces inside elements will be encoded.
     313 * @return string Content with shortcodes filtered out.
     314 */
     315function do_shortcodes_in_html_tags( $content, $ignore_html ) {
     316    // Normalize entities in unfiltered HTML before adding placeholders.
     317    $trans = array( '&#91;' => '&#091;', '&#93;' => '&#093;' );
     318    $content = strtr( $content, $trans );
     319    $trans = array( '[' => '&#91;', ']' => '&#93;' );
     320   
     321    $pattern = get_shortcode_regex();
     322
     323    $comment_regex =
     324          '!'           // Start of comment, after the <.
     325        . '(?:'         // Unroll the loop: Consume everything until --> is found.
     326        .     '-(?!->)' // Dash not followed by end of comment.
     327        .     '[^\-]*+' // Consume non-dashes.
     328        . ')*+'         // Loop possessively.
     329        . '(?:-->)?';   // End of comment. If not found, match all input.
     330
     331    $regex =
     332          '/('                   // Capture the entire match.
     333        .     '<'                // Find start of element.
     334        .     '(?(?=!--)'        // Is this a comment?
     335        .         $comment_regex // Find end of comment.
     336        .     '|'
     337        .         '[^>]*>?'      // Find end of element. If not found, match all input.
     338        .     ')'
     339        . ')/s';
     340
     341    $textarr = preg_split( $regex, $content, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
     342
     343    foreach ( $textarr as &$element ) {
     344        if ( '<' !== $element[0] ) {
     345            continue;
     346        }
     347
     348        $noopen = false === strpos( $element, '[' );
     349        $noclose = false === strpos( $element, ']' );
     350        if ( $noopen || $noclose ) {
     351            // This element does not contain shortcodes.
     352            if ( $noopen xor $noclose ) {
     353                // Need to encode stray [ or ] chars.
     354                $element = strtr( $element, $trans );
     355            }
     356            continue;
     357        }
     358
     359        if ( $ignore_html || '<!--' === substr( $element, 0, 4 ) ) {
     360            // Encode all [ and ] chars.
     361            $element = strtr( $element, $trans );
     362            continue;
     363        }
     364
     365        $attributes = wp_kses_attr_parse( $element );
     366        if ( false === $attributes ) {
     367            // Looks like we found some crazy unfiltered HTML.  Skipping it for sanity.
     368            $element = strtr( $element, $trans );
     369            continue;
     370        }
     371       
     372        // Get element name
     373        $front = array_shift( $attributes );
     374        $back = array_pop( $attributes );
     375        $matches = array();
     376        preg_match('%[a-zA-Z0-9]+%', $front, $matches);
     377        $elname = $matches[0];
     378       
     379        // Look for shortcodes in each attribute separately.
     380        foreach ( $attributes as &$attr ) {
     381            $open = strpos( $attr, '[' );
     382            $close = strpos( $attr, ']' );
     383            if ( false === $open || false === $close ) {
     384                continue; // Go to next attribute.  Square braces will be escaped at end of loop.
     385            }
     386            $double = strpos( $attr, '"' );
     387            $single = strpos( $attr, "'" );
     388            if ( ( false === $single || $open < $single ) && ( false === $double || $open < $double ) ) {
     389                // $attr like '[shortcode]' or 'name = [shortcode]' implies unfiltered_html.
     390                // In this specific situation we assume KSES did not run because the input
     391                // was written by an administrator, so we should avoid changing the output
     392                // and we do not need to run KSES here.
     393                $attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr );
     394            } else {
     395                // $attr like 'name = "[shortcode]"' or "name = '[shortcode]'"
     396                // We do not know if $content was unfiltered. Assume KSES ran before shortcodes.
     397                $count = 0;
     398                $new_attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr, -1, $count );
     399                if ( $count > 0 ) {
     400                    // Sanitize the shortcode output using KSES.
     401                    $new_attr = wp_kses_one_attr( $new_attr, $elname );
     402                    if ( '' !== $new_attr ) {
     403                        // The shortcode is safe to use now.
     404                        $attr = $new_attr;
     405                    }
     406                }
     407            }
     408        }
     409        $element = $front . implode( '', $attributes ) . $back;
     410       
     411        // Now encode any remaining [ or ] chars.
     412        $element = strtr( $element, $trans );
     413    }
     414   
     415    $content = implode( '', $textarr );
     416   
     417    return $content;
     418}
     419
     420/**
     421 * Remove placeholders added by do_shortcodes_in_html_tags().
     422 *
     423 * @since 4.2.3
     424 *
     425 * @param string $content Content to search for placeholders.
     426 * @return string Content with placeholders removed.
     427 */
     428function unescape_invalid_shortcodes( $content ) {
     429        // Clean up entire string, avoids re-parsing HTML.
     430        $trans = array( '&#91;' => '[', '&#93;' => ']' );
     431        $content = strtr( $content, $trans );
     432       
     433        return $content;
     434}
     435
     436/**
    281437 * Retrieve all attributes from the shortcodes tag.
    282438 *
     
    372528        return $content;
    373529
     530    $content = do_shortcodes_in_html_tags( $content, true );
     531
    374532    $pattern = get_shortcode_regex();
    375 
    376     return preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
     533    $content = preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
     534
     535    // Always restore square braces so we don't break things like <!--[if IE ]>
     536    $content = unescape_invalid_shortcodes( $content );
     537   
     538    return $content;
    377539}
    378540
Note: See TracChangeset for help on using the changeset viewer.