WordPress.org

Make WordPress Core


Ignore:
Timestamp:
03/29/2014 07:15:33 AM (7 years ago)
Author:
nacin
Message:

Texturize: Massive performance improvements (~600% faster); better handling of nbsp, double, and weird spaces; 136 new unit tests.

big props miqrogroove.
fixes #22692.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/formatting.php

    r27761 r27839  
    7474        $static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace );
    7575
     76        /*
     77         * Regex for common whitespace characters.
     78         *
     79         * By default, spaces include new lines, tabs, nbsp entities, and the UTF-8 nbsp.
     80         * This is designed to replace the PCRE \s sequence.  In #WP22692, that sequence
     81         * was found to be unreliable due to random inclusion of the A0 byte.
     82         */
     83        $spaces = '[\r\n\t ]|\xC2\xA0| ';
     84
     85
     86        // Pattern-based replacements of characters.
    7687        $dynamic = array();
    77         if ( "'" != $apos ) {
    78             $dynamic[ '/\'(\d\d(?:’|\')?s)/' ] = $apos . '$1'; // '99's
    79             $dynamic[ '/\'(\d)/'                   ] = $apos . '$1'; // '99
    80         }
     88
     89        // '99 '99s '99's (apostrophe)
     90        if ( "'" != $apos )
     91            $dynamic[ '/\'(?=\d)/' ] = $apos;
     92
     93        // Single quote at start, or preceded by (, {, <, [, ", or spaces.
    8194        if ( "'" != $opening_single_quote )
    82             $dynamic[ '/(\s|\A|[([{<]|")\'/'       ] = '$1' . $opening_single_quote; // opening single quote, even after (, {, <, [
     95            $dynamic[ '/(?<=\A|[([{<"]|' . $spaces . ')\'/' ] = $opening_single_quote;
     96
     97        // 9" (double prime)
    8398        if ( '"' != $double_prime )
    84             $dynamic[ '/(\d)"/'                    ] = '$1' . $double_prime; // 9" (double prime)
     99            $dynamic[ '/(?<=\d)"/' ] = $double_prime;
     100
     101        // 9' (prime)
    85102        if ( "'" != $prime )
    86             $dynamic[ '/(\d)\'/'                   ] = '$1' . $prime; // 9' (prime)
     103            $dynamic[ '/(?<=\d)\'/' ] = $prime;
     104
     105        // Apostrophe in a word.  No spaces or double primes.
    87106        if ( "'" != $apos )
    88             $dynamic[ '/(\S)\'([^\'\s])/'          ] = '$1' . $apos . '$2'; // apostrophe in a word
     107            $dynamic[ '/(?<!' . $spaces . ')\'(?!\'|' . $spaces . ')/' ] = $apos;
     108
     109        // Double quote at start, or preceded by (, {, <, [, or spaces, and not followed by spaces.
    89110        if ( '"' != $opening_quote )
    90             $dynamic[ '/(\s|\A|[([{<])"(?!\s)/'    ] = '$1' . $opening_quote . '$2'; // opening double quote, even after (, {, <, [
     111            $dynamic[ '/(?<=\A|[([{<]|' . $spaces . ')"(?!' . $spaces . ')/' ] = $opening_quote;
     112
     113        // Any remaining double quotes.
    91114        if ( '"' != $closing_quote )
    92             $dynamic[ '/"(\s|\S|\Z)/'              ] = $closing_quote . '$1'; // closing double quote
     115            $dynamic[ '/"/' ] = $closing_quote;
     116
     117        // Single quotes followed by spaces or a period.
    93118        if ( "'" != $closing_single_quote )
    94             $dynamic[ '/\'([\s.]|\Z)/'             ] = $closing_single_quote . '$1'; // closing single quote
    95 
    96         $dynamic[ '/\b(\d+)x(\d+)\b/'              ] = '$1&#215;$2'; // 9x9 (times)
     119            $dynamic[ '/\'(?=\Z|\.|' . $spaces . ')/' ] = $closing_single_quote;
    97120
    98121        $dynamic_characters = array_keys( $dynamic );
     
    135158            _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']');
    136159        } elseif ( empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack) ) {
     160
    137161            // This is not a tag, nor is the texturization disabled static strings
    138162            $curl = str_replace($static_characters, $static_replacements, $curl);
     163
    139164            // regular expressions
    140165            $curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl);
     166
     167            // 9x9 (times)
     168            if ( 1 === preg_match( '/(?<=\d)x\d/', $text ) ) {
     169                // Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one!
     170                $curl = preg_replace( '/\b(\d+)x(\d+)\b/', '$1&#215;$2', $curl );
     171            }
    141172        }
     173
     174        // Replace each & with &#038; unless it already looks like an entity.
    142175        $curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&#038;$1', $curl);
    143176    }
Note: See TracChangeset for help on using the changeset viewer.