Make WordPress Core

Changeset 28831


Ignore:
Timestamp:
06/25/2014 05:48:20 PM (10 years ago)
Author:
wonderboymusic
Message:

Optimize the wptexturize() loop:

  • Take the ampersand pattern out of the loop for speed.
  • Fix old bugs in the ampersand pattern.
  • Refactor _wptexturize_pushpop_element() without PCRE for speed.
  • Update unit tests.

Props miqrogroove.
Fixes #28623.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/formatting.php

    r28817 r28831  
    179179     * @param array $default_no_texturize_tags An array of HTML element names.
    180180     */
    181     $no_texturize_tags = '(' . implode( '|', apply_filters( 'no_texturize_tags', $default_no_texturize_tags ) ) . ')';
     181    $no_texturize_tags = apply_filters( 'no_texturize_tags', $default_no_texturize_tags );
    182182    /**
    183183     * Filter the list of shortcodes not to texturize.
     
    187187     * @param array $default_no_texturize_shortcodes An array of shortcode names.
    188188     */
    189     $no_texturize_shortcodes = '(' . implode( '|', apply_filters( 'no_texturize_shortcodes', $default_no_texturize_shortcodes ) ) . ')';
     189    $no_texturize_shortcodes = apply_filters( 'no_texturize_shortcodes', $default_no_texturize_shortcodes );
    190190
    191191    $no_texturize_tags_stack = array();
     
    207207        .       '[^\[\]<>]' // Shortcodes do not contain other shortcodes.
    208208        .   '|'
    209         .       '<.+?>' // HTML elements permitted. Prevents matching ] before >.
     209        .       '<.+?>'     // HTML elements permitted. Prevents matching ] before >.
    210210        .   ')+'
    211211        .   '\]'        // Find end of shortcode.
     
    222222
    223223            if ( '<!--' !== substr( $curl, 0, 4 ) ) {
    224                 _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>' );
     224                _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
    225225            }
    226226
     
    228228            // This is a shortcode delimeter.
    229229
    230             _wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']' );
     230            _wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes );
    231231
    232232        } elseif ( '[' === $first && 1 === preg_match( '/^\[\[?(?:[^\[\]<>]|<.+?>)+\]\]?$/', $curl ) ) {
     
    236236            // Do not push to the shortcodes stack.
    237237
    238         } elseif ( empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack) ) {
     238        } elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) ) {
    239239            // This is neither a delimeter, nor is this content inside of no_texturize pairs.  Do texturize.
    240240
    241             $curl = str_replace($static_characters, $static_replacements, $curl);
    242             $curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl);
     241            $curl = str_replace( $static_characters, $static_replacements, $curl );
     242            $curl = preg_replace( $dynamic_characters, $dynamic_replacements, $curl );
    243243
    244244            // 9x9 (times), but never 0x9999
     
    248248            }
    249249        }
    250 
    251         // Replace each & with &#038; unless it already looks like an entity.
    252         $curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&#038;$1', $curl);
    253     }
    254     return implode( '', $textarr );
     250    }
     251    $text = implode( '', $textarr );
     252
     253    // Replace each & with &#038; unless it already looks like an entity.
     254    $text = preg_replace('/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&#038;', $text);
     255
     256    return $text;
    255257}
    256258
    257259/**
    258260 * Search for disabled element tags. Push element to stack on tag open and pop
    259  * on tag close. Assumes first character of $text is tag opening.
     261 * on tag close.
     262 *
     263 * Assumes first char of $text is tag opening and last char is tag closing.
     264 * Assumes second char of $text is optionally '/' to indicate closing as in </html>.
    260265 *
    261266 * @since 2.9.0
    262267 * @access private
    263268 *
    264  * @param string $text Text to check. First character is assumed to be $opening
    265  * @param array $stack Array used as stack of opened tag elements
    266  * @param string $disabled_elements Tags to match against formatted as regexp sub-expression
    267  * @param string $opening Tag opening character, assumed to be 1 character long
    268  * @param string $closing Tag closing character
    269  */
    270 function _wptexturize_pushpop_element($text, &$stack, $disabled_elements, $opening = '<', $closing = '>') {
    271     // Check if it is a closing tag -- otherwise assume opening tag
    272     if (strncmp($opening . '/', $text, 2)) {
    273         // Opening? Check $text+1 against disabled elements
    274         if (preg_match('/^' . $disabled_elements . '\b/', substr($text, 1), $matches)) {
     269 * @param string $text Text to check. Must be a tag like <html> or [shortcode].
     270 * @param array $stack List of open tag elements.
     271 * @param array $disabled_elements The tag names to match against. Spaces are not allowed in tag names.
     272 */
     273function _wptexturize_pushpop_element($text, &$stack, $disabled_elements) {
     274    // Is it an opening tag or closing tag?
     275    if ( '/' !== $text[1] ) {
     276        $opening_tag = true;
     277        $name_offset = 1;
     278    } elseif ( 0 == count( $stack ) ) {
     279        // Stack is empty. Just stop.
     280        return;
     281    } else {
     282        $opening_tag = false;
     283        $name_offset = 2;
     284    }
     285
     286    // Parse out the tag name.
     287    $space = strpos( $text, ' ' );
     288    if ( FALSE === $space ) {
     289        $space = -1;
     290    } else {
     291        $space -= $name_offset;
     292    }
     293    $tag = substr( $text, $name_offset, $space );
     294
     295    // Handle disabled tags.
     296    if ( in_array( $tag, $disabled_elements ) ) {
     297        if ( $opening_tag ) {
    275298            /*
    276299             * This disables texturize until we find a closing tag of our type
     
    281304             */
    282305
    283             array_push($stack, $matches[1]);
    284         }
    285     } elseif ( 0 == count( $stack ) ) {
    286         // Stack is empty. Just stop.
    287     } else {
    288         // Closing? Check $text+2 against disabled elements
    289         $c = preg_quote($closing, '/');
    290         if (preg_match('/^' . $disabled_elements . $c . '/', substr($text, 2), $matches)) {
    291             $last = array_pop($stack);
    292 
    293             // Make sure it matches the opening tag
    294             if ( $last != $matches[1] ) {
    295                 array_push( $stack, $last );
    296             }
     306            array_push( $stack, $tag );
     307        } elseif ( end( $stack ) == $tag ) {
     308            array_pop( $stack );
    297309        }
    298310    }
  • trunk/tests/phpunit/tests/formatting/WPTexturize.php

    r28773 r28831  
    831831            ),
    832832            array(
     833                "word &#xabc; word",
     834                "word &#xabc; word",
     835            ),
     836            array(
     837                "word &#X394; word",
     838                "word &#X394; word",
     839            ),
     840            array(
    833841                "word &# word",
    834                 "word &# word", // invalid output?
     842                "word &#038;# word",
    835843            ),
    836844            array(
     
    840848            array(
    841849                "word &&amp; word",
    842                 "word &&amp; word",
     850                "word &#038;&amp; word",
    843851            ),
    844852            array(
    845853                "word &!amp; word",
    846                 "word &!amp; word",
     854                "word &#038;!amp; word",
     855            ),
     856            array(
     857                "word &#",
     858                "word &#038;#",
     859            ),
     860            array(
     861                "word &",
     862                "word &#038;",
    847863            ),
    848864        );
     
    12851301                '<ul><li>Hello.</li><!--<li>Goodbye.</li>--></ul>',
    12861302                '<ul><li>Hello.</li><!--<li>Goodbye.</li>--></ul>',
     1303            ),
     1304            array(
     1305                'word <img src="http://example.com/wp-content/uploads/2014/06/image-300x216.gif" /> word', // Ensure we are not corrupting image URLs.
     1306                'word <img src="http://example.com/wp-content/uploads/2014/06/image-300x216.gif" /> word',
    12871307            ),
    12881308        );
Note: See TracChangeset for help on using the changeset viewer.