WordPress.org

Make WordPress Core


Ignore:
Timestamp:
07/23/2015 05:00:44 AM (6 years ago)
Author:
pento
Message:

Shortcodes: Improve the reliablity of shortcodes inside HTML tags.

Merge of [33359] to the 3.9 branch.

Props miqrogroove.

See #15694.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/3.9/src/wp-includes/shortcodes.php

    r27394 r33386  
    186186 *
    187187 * @param string $content Content to search for shortcodes
     188 * @param bool $ignore_html When true, shortcodes inside HTML elements will be skipped.
    188189 * @return string Content with shortcodes filtered out.
    189190 */
    190 function do_shortcode($content) {
     191function do_shortcode( $content, $ignore_html = false ) {
    191192    global $shortcode_tags;
    192193
     
    198199        return $content;
    199200
     201    $tagnames = array_keys($shortcode_tags);
     202    $tagregexp = join( '|', array_map('preg_quote', $tagnames) );
     203    $pattern = "/\\[($tagregexp)/s";
     204
     205    if ( 1 !== preg_match( $pattern, $content ) ) {
     206        // Avoids parsing HTML when there are no shortcodes or embeds anyway.
     207        return $content;
     208    }
     209
     210    $content = do_shortcodes_in_html_tags( $content, $ignore_html );
     211
    200212    $pattern = get_shortcode_regex();
    201     return preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
     213    $content = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
     214   
     215    // Always restore square braces so we don't break things like <!--[if IE ]>
     216    $content = unescape_invalid_shortcodes( $content );
     217   
     218    return $content;
    202219}
    203220
     
    293310
    294311/**
     312 * Search only inside HTML elements for shortcodes and process them.
     313 *
     314 * Any [ or ] characters remaining inside elements will be HTML encoded
     315 * to prevent interference with shortcodes that are outside the elements.
     316 * Assumes $content processed by KSES already.  Users with unfiltered_html
     317 * capability may get unexpected output if angle braces are nested in tags.
     318 *
     319 * @since 4.2.3
     320 *
     321 * @param string $content Content to search for shortcodes
     322 * @param bool $ignore_html When true, all square braces inside elements will be encoded.
     323 * @return string Content with shortcodes filtered out.
     324 */
     325function do_shortcodes_in_html_tags( $content, $ignore_html ) {
     326    // Normalize entities in unfiltered HTML before adding placeholders.
     327    $trans = array( '&#91;' => '&#091;', '&#93;' => '&#093;' );
     328    $content = strtr( $content, $trans );
     329    $trans = array( '[' => '&#91;', ']' => '&#93;' );
     330   
     331    $pattern = get_shortcode_regex();
     332
     333    $comment_regex =
     334          '!'           // Start of comment, after the <.
     335        . '(?:'         // Unroll the loop: Consume everything until --> is found.
     336        .     '-(?!->)' // Dash not followed by end of comment.
     337        .     '[^\-]*+' // Consume non-dashes.
     338        . ')*+'         // Loop possessively.
     339        . '(?:-->)?';   // End of comment. If not found, match all input.
     340
     341    $regex =
     342          '/('                   // Capture the entire match.
     343        .     '<'                // Find start of element.
     344        .     '(?(?=!--)'        // Is this a comment?
     345        .         $comment_regex // Find end of comment.
     346        .     '|'
     347        .         '[^>]*>?'      // Find end of element. If not found, match all input.
     348        .     ')'
     349        . ')/s';
     350
     351    $textarr = preg_split( $regex, $content, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
     352
     353    foreach ( $textarr as &$element ) {
     354        if ( '<' !== $element[0] ) {
     355            continue;
     356        }
     357
     358        $noopen = false === strpos( $element, '[' );
     359        $noclose = false === strpos( $element, ']' );
     360        if ( $noopen || $noclose ) {
     361            // This element does not contain shortcodes.
     362            if ( $noopen xor $noclose ) {
     363                // Need to encode stray [ or ] chars.
     364                $element = strtr( $element, $trans );
     365            }
     366            continue;
     367        }
     368
     369        if ( $ignore_html || '<!--' === substr( $element, 0, 4 ) ) {
     370            // Encode all [ and ] chars.
     371            $element = strtr( $element, $trans );
     372            continue;
     373        }
     374
     375        $attributes = wp_kses_attr_parse( $element );
     376        if ( false === $attributes ) {
     377            // Looks like we found some crazy unfiltered HTML.  Skipping it for sanity.
     378            $element = strtr( $element, $trans );
     379            continue;
     380        }
     381       
     382        // Get element name
     383        $front = array_shift( $attributes );
     384        $back = array_pop( $attributes );
     385        $matches = array();
     386        preg_match('%[a-zA-Z0-9]+%', $front, $matches);
     387        $elname = $matches[0];
     388       
     389        // Look for shortcodes in each attribute separately.
     390        foreach ( $attributes as &$attr ) {
     391            $open = strpos( $attr, '[' );
     392            $close = strpos( $attr, ']' );
     393            if ( false === $open || false === $close ) {
     394                continue; // Go to next attribute.  Square braces will be escaped at end of loop.
     395            }
     396            $double = strpos( $attr, '"' );
     397            $single = strpos( $attr, "'" );
     398            if ( ( false === $single || $open < $single ) && ( false === $double || $open < $double ) ) {
     399                // $attr like '[shortcode]' or 'name = [shortcode]' implies unfiltered_html.
     400                // In this specific situation we assume KSES did not run because the input
     401                // was written by an administrator, so we should avoid changing the output
     402                // and we do not need to run KSES here.
     403                $attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr );
     404            } else {
     405                // $attr like 'name = "[shortcode]"' or "name = '[shortcode]'"
     406                // We do not know if $content was unfiltered. Assume KSES ran before shortcodes.
     407                $count = 0;
     408                $new_attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr, -1, $count );
     409                if ( $count > 0 ) {
     410                    // Sanitize the shortcode output using KSES.
     411                    $new_attr = wp_kses_one_attr( $new_attr, $elname );
     412                    if ( '' !== $new_attr ) {
     413                        // The shortcode is safe to use now.
     414                        $attr = $new_attr;
     415                    }
     416                }
     417            }
     418        }
     419        $element = $front . implode( '', $attributes ) . $back;
     420       
     421        // Now encode any remaining [ or ] chars.
     422        $element = strtr( $element, $trans );
     423    }
     424   
     425    $content = implode( '', $textarr );
     426   
     427    return $content;
     428}
     429
     430/**
     431 * Remove placeholders added by do_shortcodes_in_html_tags().
     432 *
     433 * @since 4.2.3
     434 *
     435 * @param string $content Content to search for placeholders.
     436 * @return string Content with placeholders removed.
     437 */
     438function unescape_invalid_shortcodes( $content ) {
     439        // Clean up entire string, avoids re-parsing HTML.
     440        $trans = array( '&#91;' => '[', '&#93;' => ']' );
     441        $content = strtr( $content, $trans );
     442       
     443        return $content;
     444}
     445
     446/**
    295447 * Retrieve all attributes from the shortcodes tag.
    296448 *
     
    391543        return $content;
    392544
     545    $content = do_shortcodes_in_html_tags( $content, true );
     546
    393547    $pattern = get_shortcode_regex();
    394 
    395     return preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
     548    $content = preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
     549
     550    // Always restore square braces so we don't break things like <!--[if IE ]>
     551    $content = unescape_invalid_shortcodes( $content );
     552   
     553    return $content;
    396554}
    397555
Note: See TracChangeset for help on using the changeset viewer.