Make WordPress Core


Ignore:
Timestamp:
09/29/2014 04:06:54 AM (10 years ago)
Author:
wonderboymusic
Message:

The joys of wptexturize():

  • Revert parts of [28773] and [28727] and [29748].
  • Do not crash PHP. Make the shortcode quantifier possessive to avoid backtracks.
  • Reduce backtracking in long HTML comments by 100x.
  • Do not ignore unclosed HTML comments.
  • Do not break unregistered shortcodes, e.g. [hello attr="value"].
  • Do not break HTML in shortcode attributes, e.g. [hello attr="<"].
  • Do not match for shortcodes when there is extra whitespace, e.g. [ hello ].
  • Add unit tests to show #12690 was not fully resolved.
  • Tested PHP 5.2.4, 5.2.13, 5.4.32, and 5.5.8.

Adds/modifies unit tests.

Props miqrogroove.
See #29557.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/formatting.php

    r29748 r29781  
    2929 */
    3030function wptexturize($text, $reset = false) {
    31     global $wp_cockneyreplace, $shortcode_tags;
     31    global $wp_cockneyreplace;
    3232    static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements,
    3333        $default_no_texturize_tags, $default_no_texturize_shortcodes, $run_texturize = true;
     
    206206    // Look for shortcodes and HTML elements.
    207207
    208     $tagnames = array_keys( $shortcode_tags );
    209     $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) );
    210     $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex().
     208    $comment_regex =
     209            '!'             // Start of comment, after the <.
     210        .   '(?:'           // Unroll the loop: Consume everything until --> is found.
     211        .       '-(?!->)'   // Dash not followed by end of comment.
     212        .       '[^\-]*+'   // Consume non-dashes.
     213        .   ')*+'           // Loop possessively.
     214        .   '(?:-->)?';     // End of comment. If not found, match all input.
    211215   
    212     $regex =  '/('          // Capture the entire match.
    213         .   '<'     // Find start of element.
    214         .   '(?(?=!--)' // Is this a comment?
    215         .       '.+?--\s*>' // Find end of comment
     216    $shortcode_regex =
     217            '\['            // Find start of shortcode.
     218        .   '[\/\[]?'       // Shortcodes may begin with [/ or [[
     219        .   '[^\s\/\[\]]'   // No whitespace before name.
     220        .   '[^\[\]]*+'     // Shortcodes do not contain other shortcodes. Possessive critical.
     221        .   '\]'            // Find end of shortcode.
     222        .   '\]?';          // Shortcodes may end with ]]
     223   
     224    $regex =
     225            '/('                    // Capture the entire match.
     226        .       '<'                 // Find start of element.
     227        .       '(?(?=!--)'         // Is this a comment?
     228        .           $comment_regex  // Find end of comment.
     229        .       '|'
     230        .           '[^>]+>'        // Find end of element.
     231        .       ')'
    216232        .   '|'
    217         .       '[^>]+>'    // Find end of element
    218         .   ')'
    219         . '|'
    220         .   '\['        // Find start of shortcode.
    221         .   '\[?'       // Shortcodes may begin with [[
    222         .   '\/?'       // Closing slash may precede name.
    223         .   $tagregexp  // Only match registered shortcodes, because performance.
    224         .   '[^\[\]]*'  // Shortcodes do not contain other shortcodes.
    225         .   '\]'        // Find end of shortcode.
    226         .   '\]?'       // Shortcodes may end with ]]
    227         . ')/s';
     233        .       $shortcode_regex    // Find shortcodes.
     234        .   ')/s';
    228235
    229236    $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
     
    232239        // Only call _wptexturize_pushpop_element if $curl is a delimiter.
    233240        $first = $curl[0];
    234         if ( '<' === $first && '>' === substr( $curl, -1 ) ) {
    235             // This is an HTML delimiter.
    236 
    237             if ( '<!--' !== substr( $curl, 0, 4 ) ) {
    238                 _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
    239             }
     241        if ( '<' === $first && '<!--' === substr( $curl, 0, 4 ) ) {
     242            // This is an HTML comment delimeter.
     243
     244            continue;
     245
     246        } elseif ( '<' === $first && '>' === substr( $curl, -1 ) ) {
     247            // This is an HTML element delimiter.
     248
     249            _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
    240250
    241251        } elseif ( '' === trim( $curl ) ) {
     
    244254            continue;
    245255
    246         } elseif ( '[' === $first && 1 === preg_match( '/^\[\[?\/?' . $tagregexp . '[^\[\]]*\]\]?$/', $curl ) ) {
     256        } elseif ( '[' === $first && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) {
    247257            // This is a shortcode delimiter.
    248258
Note: See TracChangeset for help on using the changeset viewer.