Make WordPress Core


Ignore:
Timestamp:
07/01/2024 11:34:19 PM (5 days ago)
Author:
dmsnell
Message:

HTML API: Optimize low-level parsing details in Tag Processor.

Introduces a number of micro-level optimizations in the Tag Processor to
improve token-scanning performance. Should contain no functional changes.

Based on benchmarking against a list of the 100 most-visited websites,
these changes result in an average improvement in performance of the Tag
Processor for scanning tags from between 3.5% and 7.5%.

Developed in https://github.com/WordPress/wordpress-develop/pull/6890
Discussed in https://core.trac.wordpress.org/ticket/61545

Follow-up to [55203].

See #61545.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-decoder.php

    r58281 r58613  
    142142        while ( $at < $end ) {
    143143            $next_character_reference_at = strpos( $text, '&', $at );
    144             if ( false === $next_character_reference_at || $next_character_reference_at >= $end ) {
     144            if ( false === $next_character_reference_at ) {
    145145                break;
    146146            }
     
    437437
    438438        if ( $code_point <= 0x7FF ) {
    439             $byte1 = ( $code_point >> 6 ) | 0xC0;
    440             $byte2 = $code_point & 0x3F | 0x80;
    441 
    442             return pack( 'CC', $byte1, $byte2 );
     439            $byte1 = chr( ( $code_point >> 6 ) | 0xC0 );
     440            $byte2 = chr( $code_point & 0x3F | 0x80 );
     441
     442            return "{$byte1}{$byte2}";
    443443        }
    444444
    445445        if ( $code_point <= 0xFFFF ) {
    446             $byte1 = ( $code_point >> 12 ) | 0xE0;
    447             $byte2 = ( $code_point >> 6 ) & 0x3F | 0x80;
    448             $byte3 = $code_point & 0x3F | 0x80;
    449 
    450             return pack( 'CCC', $byte1, $byte2, $byte3 );
     446            $byte1 = chr( ( $code_point >> 12 ) | 0xE0 );
     447            $byte2 = chr( ( $code_point >> 6 ) & 0x3F | 0x80 );
     448            $byte3 = chr( $code_point & 0x3F | 0x80 );
     449
     450            return "{$byte1}{$byte2}{$byte3}";
    451451        }
    452452
    453453        // Any values above U+10FFFF are eliminated above in the pre-check.
    454         $byte1 = ( $code_point >> 18 ) | 0xF0;
    455         $byte2 = ( $code_point >> 12 ) & 0x3F | 0x80;
    456         $byte3 = ( $code_point >> 6 ) & 0x3F | 0x80;
    457         $byte4 = $code_point & 0x3F | 0x80;
    458 
    459         return pack( 'CCCC', $byte1, $byte2, $byte3, $byte4 );
     454        $byte1 = chr( ( $code_point >> 18 ) | 0xF0 );
     455        $byte2 = chr( ( $code_point >> 12 ) & 0x3F | 0x80 );
     456        $byte3 = chr( ( $code_point >> 6 ) & 0x3F | 0x80 );
     457        $byte4 = chr( $code_point & 0x3F | 0x80 );
     458
     459        return "{$byte1}{$byte2}{$byte3}{$byte4}";
    460460    }
    461461}
Note: See TracChangeset for help on using the changeset viewer.