Make WordPress Core


Ignore:
Timestamp:
07/22/2015 05:43:35 AM (10 years ago)
Author:
pento
Message:

Shortcodes: Improve the reliablity of shortcodes inside HTML tags.

Merge of [33359] to the 4.2 branch.

Props miqrogroove.

See #15694.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/4.2/src/wp-includes/shortcodes.php

    r32116 r33360  
    185185 *
    186186 * @param string $content Content to search for shortcodes.
     187 * @param bool $ignore_html When true, shortcodes inside HTML elements will be skipped.
    187188 * @return string Content with shortcodes filtered out.
    188189 */
    189 function do_shortcode($content) {
     190function do_shortcode( $content, $ignore_html = false ) {
    190191    global $shortcode_tags;
    191192
     
    197198        return $content;
    198199
     200    $tagnames = array_keys($shortcode_tags);
     201    $tagregexp = join( '|', array_map('preg_quote', $tagnames) );
     202    $pattern = "/\\[($tagregexp)/s";
     203
     204    if ( 1 !== preg_match( $pattern, $content ) ) {
     205        // Avoids parsing HTML when there are no shortcodes or embeds anyway.
     206        return $content;
     207    }
     208
     209    $content = do_shortcodes_in_html_tags( $content, $ignore_html );
     210
    199211    $pattern = get_shortcode_regex();
    200     return preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
     212    $content = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
     213   
     214    // Always restore square braces so we don't break things like <!--[if IE ]>
     215    $content = unescape_invalid_shortcodes( $content );
     216   
     217    return $content;
    201218}
    202219
     
    292309
    293310/**
     311 * Search only inside HTML elements for shortcodes and process them.
     312 *
     313 * Any [ or ] characters remaining inside elements will be HTML encoded
     314 * to prevent interference with shortcodes that are outside the elements.
     315 * Assumes $content processed by KSES already.  Users with unfiltered_html
     316 * capability may get unexpected output if angle braces are nested in tags.
     317 *
     318 * @since 4.2.3
     319 *
     320 * @param string $content Content to search for shortcodes
     321 * @param bool $ignore_html When true, all square braces inside elements will be encoded.
     322 * @return string Content with shortcodes filtered out.
     323 */
     324function do_shortcodes_in_html_tags( $content, $ignore_html ) {
     325    // Normalize entities in unfiltered HTML before adding placeholders.
     326    $trans = array( '&#91;' => '&#091;', '&#93;' => '&#093;' );
     327    $content = strtr( $content, $trans );
     328    $trans = array( '[' => '&#91;', ']' => '&#93;' );
     329   
     330    $pattern = get_shortcode_regex();
     331
     332    $comment_regex =
     333          '!'           // Start of comment, after the <.
     334        . '(?:'         // Unroll the loop: Consume everything until --> is found.
     335        .     '-(?!->)' // Dash not followed by end of comment.
     336        .     '[^\-]*+' // Consume non-dashes.
     337        . ')*+'         // Loop possessively.
     338        . '(?:-->)?';   // End of comment. If not found, match all input.
     339
     340    $regex =
     341          '/('                   // Capture the entire match.
     342        .     '<'                // Find start of element.
     343        .     '(?(?=!--)'        // Is this a comment?
     344        .         $comment_regex // Find end of comment.
     345        .     '|'
     346        .         '[^>]*>?'      // Find end of element. If not found, match all input.
     347        .     ')'
     348        . ')/s';
     349
     350    $textarr = preg_split( $regex, $content, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
     351
     352    foreach ( $textarr as &$element ) {
     353        if ( '<' !== $element[0] ) {
     354            continue;
     355        }
     356
     357        $noopen = false === strpos( $element, '[' );
     358        $noclose = false === strpos( $element, ']' );
     359        if ( $noopen || $noclose ) {
     360            // This element does not contain shortcodes.
     361            if ( $noopen xor $noclose ) {
     362                // Need to encode stray [ or ] chars.
     363                $element = strtr( $element, $trans );
     364            }
     365            continue;
     366        }
     367
     368        if ( $ignore_html || '<!--' === substr( $element, 0, 4 ) ) {
     369            // Encode all [ and ] chars.
     370            $element = strtr( $element, $trans );
     371            continue;
     372        }
     373
     374        $attributes = wp_kses_attr_parse( $element );
     375        if ( false === $attributes ) {
     376            // Looks like we found some crazy unfiltered HTML.  Skipping it for sanity.
     377            $element = strtr( $element, $trans );
     378            continue;
     379        }
     380       
     381        // Get element name
     382        $front = array_shift( $attributes );
     383        $back = array_pop( $attributes );
     384        $matches = array();
     385        preg_match('%[a-zA-Z0-9]+%', $front, $matches);
     386        $elname = $matches[0];
     387       
     388        // Look for shortcodes in each attribute separately.
     389        foreach ( $attributes as &$attr ) {
     390            $open = strpos( $attr, '[' );
     391            $close = strpos( $attr, ']' );
     392            if ( false === $open || false === $close ) {
     393                continue; // Go to next attribute.  Square braces will be escaped at end of loop.
     394            }
     395            $double = strpos( $attr, '"' );
     396            $single = strpos( $attr, "'" );
     397            if ( ( false === $single || $open < $single ) && ( false === $double || $open < $double ) ) {
     398                // $attr like '[shortcode]' or 'name = [shortcode]' implies unfiltered_html.
     399                // In this specific situation we assume KSES did not run because the input
     400                // was written by an administrator, so we should avoid changing the output
     401                // and we do not need to run KSES here.
     402                $attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr );
     403            } else {
     404                // $attr like 'name = "[shortcode]"' or "name = '[shortcode]'"
     405                // We do not know if $content was unfiltered. Assume KSES ran before shortcodes.
     406                $count = 0;
     407                $new_attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr, -1, $count );
     408                if ( $count > 0 ) {
     409                    // Sanitize the shortcode output using KSES.
     410                    $new_attr = wp_kses_one_attr( $new_attr, $elname );
     411                    if ( '' !== $new_attr ) {
     412                        // The shortcode is safe to use now.
     413                        $attr = $new_attr;
     414                    }
     415                }
     416            }
     417        }
     418        $element = $front . implode( '', $attributes ) . $back;
     419       
     420        // Now encode any remaining [ or ] chars.
     421        $element = strtr( $element, $trans );
     422    }
     423   
     424    $content = implode( '', $textarr );
     425   
     426    return $content;
     427}
     428
     429/**
     430 * Remove placeholders added by do_shortcodes_in_html_tags().
     431 *
     432 * @since 4.2.3
     433 *
     434 * @param string $content Content to search for placeholders.
     435 * @return string Content with placeholders removed.
     436 */
     437function unescape_invalid_shortcodes( $content ) {
     438        // Clean up entire string, avoids re-parsing HTML.
     439        $trans = array( '&#91;' => '[', '&#93;' => ']' );
     440        $content = strtr( $content, $trans );
     441       
     442        return $content;
     443}
     444
     445/**
    294446 * Retrieve all attributes from the shortcodes tag.
    295447 *
     
    390542        return $content;
    391543
     544    $content = do_shortcodes_in_html_tags( $content, true );
     545
    392546    $pattern = get_shortcode_regex();
    393 
    394     return preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
     547    $content = preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
     548
     549    // Always restore square braces so we don't break things like <!--[if IE ]>
     550    $content = unescape_invalid_shortcodes( $content );
     551   
     552    return $content;
    395553}
    396554
Note: See TracChangeset for help on using the changeset viewer.