Make WordPress Core


Ignore:
Timestamp:
07/28/2015 11:02:04 PM (10 years ago)
Author:
wonderboymusic
Message:

Protect newlines inside of CDATA. This was breaking things, notably inline JS that used comments for HTML standards compat.

  • Tokenize newlines in WP_Embed::autoembed() before running ->autoembed_callback()
  • Tokenize newlines with placeholders in wpautop()
  • Introduce wp_html_split() to DRY the RegEx from wp_replace_in_html_tags() and do_shortcodes_in_html_tags()

Adds unit tests.

Props miqrogroove, kitchin, azaozz.
Fixes #33106.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/formatting.php

    r33440 r33469  
    505505    $pee = str_replace(array("\r\n", "\r"), "\n", $pee);
    506506
    507     // Strip newlines from all elements.
    508     $pee = wp_replace_in_html_tags( $pee, array( "\n" => " " ) );
     507    // Find newlines in all elements and add placeholders.
     508    $pee = wp_replace_in_html_tags( $pee, array( "\n" => " <!-- wpnl --> " ) );
    509509
    510510    // Collapse line breaks before and after <option> elements so they don't get autop'd.
     
    593593        $pee = str_replace(array_keys($pre_tags), array_values($pre_tags), $pee);
    594594
     595    // Restore newlines in all elements.
     596    $pee = str_replace( " <!-- wpnl --> ", "\n", $pee );
     597
    595598    return $pee;
     599}
     600
     601/**
     602 * Separate HTML elements and comments from the text.
     603 *
     604 * @since 4.2.4
     605 *
     606 * @param string $input The text which has to be formatted.
     607 * @return array The formatted text.
     608 */
     609function wp_html_split( $input ) {
     610    static $regex;
     611
     612    if ( ! isset( $regex ) ) {
     613        $comments =
     614              '!'           // Start of comment, after the <.
     615            . '(?:'         // Unroll the loop: Consume everything until --> is found.
     616            .     '-(?!->)' // Dash not followed by end of comment.
     617            .     '[^\-]*+' // Consume non-dashes.
     618            . ')*+'         // Loop possessively.
     619            . '(?:-->)?';   // End of comment. If not found, match all input.
     620
     621        $cdata =
     622              '!\[CDATA\['  // Start of comment, after the <.
     623            . '[^\]]*+'     // Consume non-].
     624            . '(?:'         // Unroll the loop: Consume everything until ]]> is found.
     625            .     '](?!]>)' // One ] not followed by end of comment.
     626            .     '[^\]]*+' // Consume non-].
     627            . ')*+'         // Loop possessively.
     628            . '(?:]]>)?';   // End of comment. If not found, match all input.
     629
     630        $regex =
     631              '/('              // Capture the entire match.
     632            .     '<'           // Find start of element.
     633            .     '(?(?=!--)'   // Is this a comment?
     634            .         $comments // Find end of comment.
     635            .     '|'
     636            .         '(?(?=!\[CDATA\[)' // Is this a comment?
     637            .             $cdata // Find end of comment.
     638            .         '|'
     639            .             '[^>]*>?' // Find end of element. If not found, match all input.
     640            .         ')'
     641            .     ')'
     642            . ')/s';
     643    }
     644
     645    return preg_split( $regex, $input, -1, PREG_SPLIT_DELIM_CAPTURE );
    596646}
    597647
     
    607657function wp_replace_in_html_tags( $haystack, $replace_pairs ) {
    608658    // Find all elements.
    609     $comments =
    610           '!'           // Start of comment, after the <.
    611         . '(?:'         // Unroll the loop: Consume everything until --> is found.
    612         .     '-(?!->)' // Dash not followed by end of comment.
    613         .     '[^\-]*+' // Consume non-dashes.
    614         . ')*+'         // Loop possessively.
    615         . '(?:-->)?';   // End of comment. If not found, match all input.
    616 
    617     $regex =
    618           '/('              // Capture the entire match.
    619         .     '<'           // Find start of element.
    620         .     '(?(?=!--)'   // Is this a comment?
    621         .         $comments // Find end of comment.
    622         .     '|'
    623         .         '[^>]*>?' // Find end of element. If not found, match all input.
    624         .     ')'
    625         . ')/s';
    626 
    627     $textarr = preg_split( $regex, $haystack, -1, PREG_SPLIT_DELIM_CAPTURE );
     659    $textarr = wp_html_split( $haystack );
    628660    $changed = false;
    629661
Note: See TracChangeset for help on using the changeset viewer.