Make WordPress Core

Changeset 33469


Ignore:
Timestamp:
07/28/2015 11:02:04 PM (10 years ago)
Author:
wonderboymusic
Message:

Protect newlines inside of CDATA. This was breaking things, notably inline JS that used comments for HTML standards compat.

  • Tokenize newlines in WP_Embed::autoembed() before running ->autoembed_callback()
  • Tokenize newlines with placeholders in wpautop()
  • Introduce wp_html_split() to DRY the RegEx from wp_replace_in_html_tags() and do_shortcodes_in_html_tags()

Adds unit tests.

Props miqrogroove, kitchin, azaozz.
Fixes #33106.

Location:
trunk
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/class-wp-embed.php

    r33359 r33469  
    130130     */
    131131    public function shortcode( $attr, $url = '' ) {
     132        // This filter can be used to output custom HTML instead of allowing oEmbed to run.
     133        $custom = apply_filters( 'wp_embed_shortcode_custom', false, $attr, $url );
     134        if ( false !== $custom ) {
     135            return $custom;
     136        }
     137       
    132138        $post = get_post();
    133139
     
    319325     */
    320326    public function autoembed( $content ) {
    321         // Strip newlines from all elements.
    322         $content = wp_replace_in_html_tags( $content, array( "\n" => " " ) );
     327        // Replace line breaks from all HTML elements with placeholders.
     328        $content = wp_replace_in_html_tags( $content, array( "\n" => '<!-- wp-line-break -->' ) );
    323329
    324330        // Find URLs that are on their own line.
    325         return preg_replace_callback( '|^(\s*)(https?://[^\s"]+)(\s*)$|im', array( $this, 'autoembed_callback' ), $content );
     331        $content = preg_replace_callback( '|^(\s*)(https?://[^\s"]+)(\s*)$|im', array( $this, 'autoembed_callback' ), $content );
     332
     333        // Put the line breaks back.
     334        return str_replace( '<!-- wp-line-break -->', "\n", $content );
    326335    }
    327336
  • trunk/src/wp-includes/formatting.php

    r33440 r33469  
    505505    $pee = str_replace(array("\r\n", "\r"), "\n", $pee);
    506506
    507     // Strip newlines from all elements.
    508     $pee = wp_replace_in_html_tags( $pee, array( "\n" => " " ) );
     507    // Find newlines in all elements and add placeholders.
     508    $pee = wp_replace_in_html_tags( $pee, array( "\n" => " <!-- wpnl --> " ) );
    509509
    510510    // Collapse line breaks before and after <option> elements so they don't get autop'd.
     
    593593        $pee = str_replace(array_keys($pre_tags), array_values($pre_tags), $pee);
    594594
     595    // Restore newlines in all elements.
     596    $pee = str_replace( " <!-- wpnl --> ", "\n", $pee );
     597
    595598    return $pee;
     599}
     600
     601/**
     602 * Separate HTML elements and comments from the text.
     603 *
     604 * @since 4.2.4
     605 *
     606 * @param string $input The text which has to be formatted.
     607 * @return array The formatted text.
     608 */
     609function wp_html_split( $input ) {
     610    static $regex;
     611
     612    if ( ! isset( $regex ) ) {
     613        $comments =
     614              '!'           // Start of comment, after the <.
     615            . '(?:'         // Unroll the loop: Consume everything until --> is found.
     616            .     '-(?!->)' // Dash not followed by end of comment.
     617            .     '[^\-]*+' // Consume non-dashes.
     618            . ')*+'         // Loop possessively.
     619            . '(?:-->)?';   // End of comment. If not found, match all input.
     620
     621        $cdata =
     622              '!\[CDATA\['  // Start of comment, after the <.
     623            . '[^\]]*+'     // Consume non-].
     624            . '(?:'         // Unroll the loop: Consume everything until ]]> is found.
     625            .     '](?!]>)' // One ] not followed by end of comment.
     626            .     '[^\]]*+' // Consume non-].
     627            . ')*+'         // Loop possessively.
     628            . '(?:]]>)?';   // End of comment. If not found, match all input.
     629
     630        $regex =
     631              '/('              // Capture the entire match.
     632            .     '<'           // Find start of element.
     633            .     '(?(?=!--)'   // Is this a comment?
     634            .         $comments // Find end of comment.
     635            .     '|'
     636            .         '(?(?=!\[CDATA\[)' // Is this a comment?
     637            .             $cdata // Find end of comment.
     638            .         '|'
     639            .             '[^>]*>?' // Find end of element. If not found, match all input.
     640            .         ')'
     641            .     ')'
     642            . ')/s';
     643    }
     644
     645    return preg_split( $regex, $input, -1, PREG_SPLIT_DELIM_CAPTURE );
    596646}
    597647
     
    607657function wp_replace_in_html_tags( $haystack, $replace_pairs ) {
    608658    // Find all elements.
    609     $comments =
    610           '!'           // Start of comment, after the <.
    611         . '(?:'         // Unroll the loop: Consume everything until --> is found.
    612         .     '-(?!->)' // Dash not followed by end of comment.
    613         .     '[^\-]*+' // Consume non-dashes.
    614         . ')*+'         // Loop possessively.
    615         . '(?:-->)?';   // End of comment. If not found, match all input.
    616 
    617     $regex =
    618           '/('              // Capture the entire match.
    619         .     '<'           // Find start of element.
    620         .     '(?(?=!--)'   // Is this a comment?
    621         .         $comments // Find end of comment.
    622         .     '|'
    623         .         '[^>]*>?' // Find end of element. If not found, match all input.
    624         .     ')'
    625         . ')/s';
    626 
    627     $textarr = preg_split( $regex, $haystack, -1, PREG_SPLIT_DELIM_CAPTURE );
     659    $textarr = wp_html_split( $haystack );
    628660    $changed = false;
    629661
  • trunk/src/wp-includes/shortcodes.php

    r33359 r33469  
    334334   
    335335    $pattern = get_shortcode_regex();
    336 
    337     $comment_regex =
    338           '!'           // Start of comment, after the <.
    339         . '(?:'         // Unroll the loop: Consume everything until --> is found.
    340         .     '-(?!->)' // Dash not followed by end of comment.
    341         .     '[^\-]*+' // Consume non-dashes.
    342         . ')*+'         // Loop possessively.
    343         . '(?:-->)?';   // End of comment. If not found, match all input.
    344 
    345     $regex =
    346           '/('                   // Capture the entire match.
    347         .     '<'                // Find start of element.
    348         .     '(?(?=!--)'        // Is this a comment?
    349         .         $comment_regex // Find end of comment.
    350         .     '|'
    351         .         '[^>]*>?'      // Find end of element. If not found, match all input.
    352         .     ')'
    353         . ')/s';
    354 
    355     $textarr = preg_split( $regex, $content, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
     336    $textarr = wp_html_split( $content );
    356337
    357338    foreach ( $textarr as &$element ) {
    358         if ( '<' !== $element[0] ) {
     339        if ( '' == $element || '<' !== $element[0] ) {
    359340            continue;
    360341        }
     
    371352        }
    372353
    373         if ( $ignore_html || '<!--' === substr( $element, 0, 4 ) ) {
     354        if ( $ignore_html || '<!--' === substr( $element, 0, 4 ) || '<![CDATA[' === substr( $element, 0, 9 ) ) {
    374355            // Encode all [ and ] chars.
    375356            $element = strtr( $element, $trans );
  • trunk/tests/phpunit/tests/formatting/Autop.php

    r31191 r33469  
    400400        $this->assertEquals( $expected, trim( wpautop( $content ) ) );
    401401    }
     402
     403    /**
     404     * Do not allow newlines within HTML elements to become mangled.
     405     *
     406     * @ticket 33106
     407     * @dataProvider data_element_sanity
     408     */
     409    function test_element_sanity( $input, $output ) {
     410        return $this->assertEquals( $output, wpautop( $input ) );
     411    }
     412
     413    function data_element_sanity() {
     414        return array(
     415            array(
     416                "Hello <a\nhref='world'>",
     417                "<p>Hello <a\nhref='world'></p>\n",
     418            ),
     419            array(
     420                "Hello <!-- a\nhref='world' -->",
     421                "<p>Hello <!-- a\nhref='world' --></p>\n",
     422            ),
     423/* Block elements inside comments will fail this test in all versions, it's not a regression.
     424            array(
     425                "Hello <!-- <hr> a\nhref='world' -->",
     426                "<p>Hello <!-- <hr> a\nhref='world' --></p>\n",
     427            ),
     428            array(
     429                "Hello <![CDATA[ <hr> a\nhttps://youtu.be/jgz0uSaOZbE\n ]]>",
     430                "<p>Hello <![CDATA[ <hr> a\nhttps://youtu.be/jgz0uSaOZbE\n ]]></p>\n",
     431            ),
     432*/
     433            array(
     434                "Hello <![CDATA[ a\nhttps://youtu.be/jgz0uSaOZbE\n ]]>",
     435                "<p>Hello <![CDATA[ a\nhttps://youtu.be/jgz0uSaOZbE\n ]]></p>\n",
     436            ),
     437            array(
     438                "Hello <![CDATA[ <!-- a\nhttps://youtu.be/jgz0uSaOZbE\n a\n9 ]]> -->",
     439                "<p>Hello <![CDATA[ <!-- a\nhttps://youtu.be/jgz0uSaOZbE\n a\n9 ]]> --></p>\n",
     440            ),
     441            array(
     442                "Hello <![CDATA[ <!-- a\nhttps://youtu.be/jgz0uSaOZbE\n a\n9 --> a\n9 ]]>",
     443                "<p>Hello <![CDATA[ <!-- a\nhttps://youtu.be/jgz0uSaOZbE\n a\n9 --> a\n9 ]]></p>\n",
     444            ),
     445        );
     446    }
     447   
    402448}
  • trunk/tests/phpunit/tests/media.php

    r32930 r33469  
    586586    }
    587587
     588    /**
     589     * @ticket 33016
     590     */
     591    function test_multiline_cdata() {
     592        global $wp_embed;
     593
     594        $content = <<<EOF
     595<script>// <![CDATA[
     596_my_function('data');
     597// ]]>
     598</script>
     599EOF;
     600
     601        $result = $wp_embed->autoembed( $content );
     602        $this->assertEquals( $content, $result );
     603    }
     604
     605    /**
     606     * @ticket 33016
     607     */
     608    function test_multiline_comment() {
     609        global $wp_embed;
     610
     611        $content = <<<EOF
     612<script><!--
     613my_function();
     614// --> </script>
     615EOF;
     616
     617        $result = $wp_embed->autoembed( $content );
     618        $this->assertEquals( $content, $result );
     619    }
     620
     621
     622    /**
     623     * @ticket 33016
     624     */
     625    function test_multiline_comment_with_embeds() {
     626        $content = <<<EOF
     627Start.
     628[embed]http://www.youtube.com/embed/TEST01YRHA0[/embed]
     629<script><!--
     630my_function();
     631// --> </script>
     632http://www.youtube.com/embed/TEST02YRHA0
     633[embed]http://www.example.com/embed/TEST03YRHA0[/embed]
     634http://www.example.com/embed/TEST04YRHA0
     635Stop.
     636EOF;
     637
     638        $expected = <<<EOF
     639<p>Start.<br />
     640https://youtube.com/watch?v=TEST01YRHA0<br />
     641<script><!--
     642my_function();
     643// --> </script><br />
     644https://youtube.com/watch?v=TEST02YRHA0<br />
     645<a href="http://www.example.com/embed/TEST03YRHA0">http://www.example.com/embed/TEST03YRHA0</a><br />
     646http://www.example.com/embed/TEST04YRHA0<br />
     647Stop.</p>
     648
     649EOF;
     650
     651        $result = apply_filters( 'the_content', $content );
     652        $this->assertEquals( $expected, $result );
     653    }
     654
     655    /**
     656     * @ticket 33016
     657     */
     658    function filter_wp_embed_shortcode_custom( $custom, $attr, $url ) {
     659        if ( 'https://www.example.com/?video=1' == $url ) {
     660            $custom = "<iframe src='$url'></iframe>";
     661        }
     662        return $custom;
     663    }
     664
     665    /**
     666     * @ticket 33016
     667     */
     668    function test_oembed_explicit_media_link() {
     669        global $wp_embed;
     670        add_filter( 'wp_embed_shortcode_custom', array( $this, 'filter_wp_embed_shortcode_custom' ), 10, 3 );
     671
     672        $content = <<<EOF
     673https://www.example.com/?video=1
     674EOF;
     675
     676        $expected = <<<EOF
     677<iframe src='https://www.example.com/?video=1'></iframe>
     678EOF;
     679
     680        $result = $wp_embed->autoembed( $content );
     681        $this->assertEquals( $expected, $result );
     682
     683        $content = <<<EOF
     684<a href="https://www.example.com/?video=1">https://www.example.com/?video=1</a>
     685<script>// <![CDATA[
     686_my_function('data');
     687myvar = 'Hello world
     688https://www.example.com/?video=1
     689don't break this';
     690// ]]>
     691</script>
     692EOF;
     693
     694        $result = $wp_embed->autoembed( $content );
     695        $this->assertEquals( $content, $result );
     696
     697        remove_filter( 'wp_embed_shortcode_custom', array( $this, 'filter_wp_embed_shortcode_custom' ), 10 );
     698    }
    588699}
Note: See TracChangeset for help on using the changeset viewer.