Make WordPress Core

Changeset 57179


Ignore:
Timestamp:
12/10/2023 01:17:29 PM (16 months ago)
Author:
zieladam
Message:

HTML API: Track spans of text with (offset, length) instead of (start, end).

Updates the internal representation of the text span coordinates. The mixture of (offset, length) and (start, end) coordinates becomes confusing, this commit replaces it with a (offset, length) pair. There should be no functional or behavioral changes in this patch. For the internal helper classes this patch introduces breaking changes, but those classes are marked private and should not be used outside of the HTML API itself.

Props dmsnell.
Fixes #59993.

Location:
trunk/src/wp-includes/html-api
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-attribute-token.php

    r55734 r57179  
    1616 * @access private
    1717 * @since 6.2.0
     18 * @since 6.5.0 Replaced `end` with `length` to more closely match `substr()`.
    1819 *
    1920 * @see WP_HTML_Tag_Processor
     
    2425     *
    2526     * @since 6.2.0
     27     *
    2628     * @var string
    2729     */
     
    3234     *
    3335     * @since 6.2.0
     36     *
    3437     * @var int
    3538     */
     
    4043     *
    4144     * @since 6.2.0
     45     *
    4246     * @var int
    4347     */
     
    4852     *
    4953     * @since 6.2.0
     54     *
    5055     * @var int
    5156     */
     
    5358
    5459    /**
    55      * The string offset after the attribute value or its name.
     60     * Byte length of text spanning the attribute inside a tag.
    5661     *
    57      * @since 6.2.0
     62     * This span starts at the first character of the attribute name
     63     * and it ends after one of three cases:
     64     *
     65     *  - at the end of the attribute name for boolean attributes.
     66     *  - at the end of the value for unquoted attributes.
     67     *  - at the final single or double quote for quoted attributes.
     68     *
     69     * Example:
     70     *
     71     *     <div class="post">
     72     *          ------------ length is 12, including quotes
     73     *
     74     *     <input type="checked" checked id="selector">
     75     *                           ------- length is 6
     76     *
     77     *     <a rel=noopener>
     78     *        ------------ length is 11
     79     *
     80     * @since 6.5.0 Replaced `end` with `length` to more closely match `substr()`.
     81     *
    5882     * @var int
    5983     */
    60     public $end;
     84    public $length;
    6185
    6286    /**
     
    6488     *
    6589     * @since 6.2.0
     90     *
    6691     * @var bool
    6792     */
     
    7297     *
    7398     * @since 6.2.0
     99     * @since 6.5.0 Replaced `end` with `length` to more closely match `substr()`.
    74100     *
    75101     * @param string $name         Attribute name.
     
    77103     * @param int    $value_length Number of bytes attribute value spans.
    78104     * @param int    $start        The string offset where the attribute name starts.
    79      * @param int    $end          The string offset after the attribute value or its name.
     105     * @param int    $length       Byte length of the entire attribute name or name and value pair expression.
    80106     * @param bool   $is_true      Whether the attribute is a boolean attribute with true value.
    81107     */
    82     public function __construct( $name, $value_start, $value_length, $start, $end, $is_true ) {
     108    public function __construct( $name, $value_start, $value_length, $start, $length, $is_true ) {
    83109        $this->name            = $name;
    84110        $this->value_starts_at = $value_start;
    85111        $this->value_length    = $value_length;
    86112        $this->start           = $start;
    87         $this->end             = $end;
     113        $this->length          = $length;
    88114        $this->is_true         = $is_true;
    89115    }
  • trunk/src/wp-includes/html-api/class-wp-html-span.php

    r55734 r57179  
    1919 * @access private
    2020 * @since 6.2.0
     21 * @since 6.5.0 Replaced `end` with `length` to more closely align with `substr()`.
    2122 *
    2223 * @see WP_HTML_Tag_Processor
     
    2728     *
    2829     * @since 6.2.0
     30     *
    2931     * @var int
    3032     */
     
    3234
    3335    /**
    34      * Byte offset into document where span ends.
     36     * Byte length of this span.
    3537     *
    36      * @since 6.2.0
     38     * @since 6.5.0
     39     *
    3740     * @var int
    3841     */
    39     public $end;
     42    public $length;
    4043
    4144    /**
     
    4447     * @since 6.2.0
    4548     *
    46      * @param int $start Byte offset into document where replacement span begins.
    47      * @param int $end   Byte offset into document where replacement span ends.
     49     * @param int $start  Byte offset into document where replacement span begins.
     50     * @param int $length Byte length of span.
    4851     */
    49     public function __construct( $start, $end ) {
    50         $this->start = $start;
    51         $this->end   = $end;
     52    public function __construct( $start, $length ) {
     53        $this->start  = $start;
     54        $this->length = $length;
    5255    }
    5356}
  • trunk/src/wp-includes/html-api/class-wp-html-tag-processor.php

    r57116 r57179  
    330330
    331331    /**
     332     * Byte offset in input document where current token starts.
     333     *
     334     * Example:
     335     *
     336     *     <div id="test">...
     337     *     01234
     338     *     - token starts at 0
     339     *
     340     * @since 6.5.0
     341     *
     342     * @var int|null
     343     */
     344    private $token_starts_at;
     345
     346    /**
     347     * Byte length of current token.
     348     *
     349     * Example:
     350     *
     351     *     <div id="test">...
     352     *     012345678901234
     353     *     - token length is 14 - 0 = 14
     354     *
     355     *     a <!-- comment --> is a token.
     356     *     0123456789 123456789 123456789
     357     *     - token length is 17 - 2 = 15
     358     *
     359     * @since 6.5.0
     360     *
     361     * @var int|null
     362     */
     363    private $token_length;
     364
     365    /**
    332366     * Byte offset in input document where current tag name starts.
    333367     *
     
    339373     *
    340374     * @since 6.2.0
     375     *
    341376     * @var int|null
    342377     */
     
    353388     *
    354389     * @since 6.2.0
     390     *
    355391     * @var int|null
    356392     */
    357393    private $tag_name_length;
    358 
    359     /**
    360      * Byte offset in input document where current tag token ends.
    361      *
    362      * Example:
    363      *
    364      *     <div id="test">...
    365      *     0         1   |
    366      *     01234567890123456
    367      *      --- tag name ends at 14
    368      *
    369      * @since 6.2.0
    370      * @var int|null
    371      */
    372     private $tag_ends_at;
    373394
    374395    /**
     
    389410     *     //                 ^ parsing will continue from this point.
    390411     *     $this->attributes = array(
    391      *         'id' => new WP_HTML_Attribute_Match( 'id', null, 6, 17 )
     412     *         'id' => new WP_HTML_Attribute_Token( 'id', 9, 6, 5, 11, false )
    392413     *     );
    393414     *
     
    395416     *     // `class` attribute we will continue and add to this array.
    396417     *     $this->attributes = array(
    397      *         'id'    => new WP_HTML_Attribute_Match( 'id', null, 6, 17 ),
    398      *         'class' => new WP_HTML_Attribute_Match( 'class', 'outline', 18, 32 )
     418     *         'id'    => new WP_HTML_Attribute_Token( 'id', 9, 6, 5, 11, false ),
     419     *         'class' => new WP_HTML_Attribute_Token( 'class', 23, 7, 17, 13, false )
    399420     *     );
    400421     *
     
    485506     *     // Replace an attribute stored with a new value, indices
    486507     *     // sourced from the lazily-parsed HTML recognizer.
    487      *     $start = $attributes['src']->start;
    488      *     $end   = $attributes['src']->end;
    489      *     $modifications[] = new WP_HTML_Text_Replacement( $start, $end, $new_value );
     508     *     $start  = $attributes['src']->start;
     509     *     $length = $attributes['src']->length;
     510     *     $modifications[] = new WP_HTML_Text_Replacement( $start, $length, $new_value );
    490511     *
    491512     *     // Correspondingly, something like this will appear in this array.
     
    567588                return false;
    568589            }
    569             $this->tag_ends_at          = $tag_ends_at;
     590            $this->token_length         = $tag_ends_at - $this->token_starts_at;
    570591            $this->bytes_already_parsed = $tag_ends_at;
    571592
     
    809830        }
    810831
    811         $this->bookmarks[ $name ] = new WP_HTML_Span(
    812             $this->tag_name_starts_at - ( $this->is_closing_tag ? 2 : 1 ),
    813             $this->tag_ends_at
    814         );
     832        $this->bookmarks[ $name ] = new WP_HTML_Span( $this->token_starts_at, $this->token_length );
    815833
    816834        return true;
     
    876894            $at = strpos( $this->html, '</', $at );
    877895
    878             // If there is no possible tag closer then fail.
     896            // Fail if there is no possible tag closer.
    879897            if ( false === $at || ( $at + $tag_length ) >= $doc_length ) {
    880898                $this->bytes_already_parsed = $doc_length;
     
    10941112            }
    10951113
     1114            $this->token_starts_at = $at;
     1115
    10961116            if ( '/' === $this->html[ $at + 1 ] ) {
    10971117                $this->is_closing_tag = true;
     
    13821402                $value_length,
    13831403                $attribute_start,
    1384                 $attribute_end,
     1404                $attribute_end - $attribute_start,
    13851405                ! $has_value
    13861406            );
     
    13971417         * normative case of parsing tags with no duplicate attributes.
    13981418         */
    1399         $duplicate_span = new WP_HTML_Span( $attribute_start, $attribute_end );
     1419        $duplicate_span = new WP_HTML_Span( $attribute_start, $attribute_end - $attribute_start );
    14001420        if ( null === $this->duplicate_attributes ) {
    14011421            $this->duplicate_attributes = array( $comparable_name => array( $duplicate_span ) );
     
    14251445    private function after_tag() {
    14261446        $this->get_updated_html();
     1447        $this->token_starts_at      = null;
     1448        $this->token_length         = null;
    14271449        $this->tag_name_starts_at   = null;
    14281450        $this->tag_name_length      = null;
    1429         $this->tag_ends_at          = null;
    14301451        $this->is_closing_tag       = null;
    14311452        $this->attributes           = array();
     
    16071628        $output_buffer        = '';
    16081629        foreach ( $this->lexical_updates as $diff ) {
    1609             $shift = strlen( $diff->text ) - ( $diff->end - $diff->start );
     1630            $shift = strlen( $diff->text ) - $diff->length;
    16101631
    16111632            // Adjust the cursor position by however much an update affects it.
     
    16211642            $output_buffer       .= substr( $this->html, $bytes_already_copied, $diff->start - $bytes_already_copied );
    16221643            $output_buffer       .= $diff->text;
    1623             $bytes_already_copied = $diff->end;
     1644            $bytes_already_copied = $diff->start + $diff->length;
    16241645        }
    16251646
     
    16311652         */
    16321653        foreach ( $this->bookmarks as $bookmark_name => $bookmark ) {
     1654            $bookmark_end   = $bookmark->start + $bookmark->length;
     1655
    16331656            /*
    16341657             * Each lexical update which appears before the bookmark's endpoints
     
    16411664
    16421665            foreach ( $this->lexical_updates as $diff ) {
    1643                 if ( $bookmark->start < $diff->start && $bookmark->end < $diff->start ) {
     1666                $diff_end = $diff->start + $diff->length;
     1667
     1668                if ( $bookmark->start < $diff->start && $bookmark_end < $diff->start ) {
    16441669                    break;
    16451670                }
    16461671
    1647                 if ( $bookmark->start >= $diff->start && $bookmark->end < $diff->end ) {
     1672                if ( $bookmark->start >= $diff->start && $bookmark_end < $diff_end ) {
    16481673                    $this->release_bookmark( $bookmark_name );
    16491674                    continue 2;
    16501675                }
    16511676
    1652                 $delta = strlen( $diff->text ) - ( $diff->end - $diff->start );
     1677                $delta = strlen( $diff->text ) - $diff->length;
    16531678
    16541679                if ( $bookmark->start >= $diff->start ) {
     
    16561681                }
    16571682
    1658                 if ( $bookmark->end >= $diff->end ) {
     1683                if ( $bookmark_end >= $diff_end ) {
    16591684                    $tail_delta += $delta;
    16601685                }
    16611686            }
    16621687
    1663             $bookmark->start += $head_delta;
    1664             $bookmark->end   += $tail_delta;
     1688            $bookmark->start  += $head_delta;
     1689            $bookmark->length += $tail_delta - $head_delta;
    16651690        }
    16661691
     
    17441769         * start at the same location and contain the same text.
    17451770         */
    1746         return $a->end - $b->end;
     1771        return $a->length - $b->length;
    17471772    }
    17481773
     
    19721997        }
    19731998
    1974         return '/' === $this->html[ $this->tag_ends_at - 1 ];
     1999        /*
     2000         * The self-closing flag is the solidus at the _end_ of the tag, not the beginning.
     2001         *
     2002         * Example:
     2003         *
     2004         *     <figure />
     2005         *             ^ this appears one character before the end of the closing ">".
     2006         */
     2007        return '/' === $this->html[ $this->token_starts_at + $this->token_length - 1 ];
    19752008    }
    19762009
     
    21022135            $this->lexical_updates[ $comparable_name ] = new WP_HTML_Text_Replacement(
    21032136                $existing_attribute->start,
    2104                 $existing_attribute->end,
     2137                $existing_attribute->length,
    21052138                $updated_attribute
    21062139            );
     
    21202153            $this->lexical_updates[ $comparable_name ] = new WP_HTML_Text_Replacement(
    21212154                $this->tag_name_starts_at + $this->tag_name_length,
    2122                 $this->tag_name_starts_at + $this->tag_name_length,
     2155                0,
    21232156                ' ' . $updated_attribute
    21242157            );
     
    21952228        $this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement(
    21962229            $this->attributes[ $name ]->start,
    2197             $this->attributes[ $name ]->end,
     2230            $this->attributes[ $name ]->length,
    21982231            ''
    21992232        );
     
    22042237                $this->lexical_updates[] = new WP_HTML_Text_Replacement(
    22052238                    $attribute_token->start,
    2206                     $attribute_token->end,
     2239                    $attribute_token->length,
    22072240                    ''
    22082241                );
     
    22902323         * be necessary for reparsing the current tag after updating the HTML.
    22912324         */
    2292         $before_current_tag = $this->tag_name_starts_at - 1;
     2325        $before_current_tag = $this->token_starts_at;
    22932326
    22942327        /*
     
    23262359
    23272360        $tag_ends_at                = strpos( $this->html, '>', $this->bytes_already_parsed );
    2328         $this->tag_ends_at          = $tag_ends_at;
     2361        $this->token_length         = $tag_ends_at - $this->token_starts_at;
    23292362        $this->bytes_already_parsed = $tag_ends_at;
    23302363
  • trunk/src/wp-includes/html-api/class-wp-html-text-replacement.php

    r55734 r57179  
    1616 * @access private
    1717 * @since 6.2.0
     18 * @since 6.5.0 Replace `end` with `length` to more closely match `substr()`.
    1819 *
    1920 * @see WP_HTML_Tag_Processor
     
    2425     *
    2526     * @since 6.2.0
     27     *
    2628     * @var int
    2729     */
     
    2931
    3032    /**
    31      * Byte offset into document where replacement span ends.
     33     * Byte length of span being replaced.
    3234     *
    33      * @since 6.2.0
     35     * @since 6.5.0
     36     *
    3437     * @var int
    3538     */
    36     public $end;
     39    public $length;
    3740
    3841    /**
     
    4043     *
    4144     * @since 6.2.0
     45     *
    4246     * @var string
    4347     */
     
    4953     * @since 6.2.0
    5054     *
    51      * @param int    $start Byte offset into document where replacement span begins.
    52      * @param int    $end   Byte offset into document where replacement span ends.
    53      * @param string $text  Span of text to insert in document to replace existing content from start to end.
     55     * @param int    $start  Byte offset into document where replacement span begins.
     56     * @param int    $length Byte length of span in document being replaced.
     57     * @param string $text   Span of text to insert in document to replace existing content from start to end.
    5458     */
    55     public function __construct( $start, $end, $text ) {
    56         $this->start = $start;
    57         $this->end   = $end;
    58         $this->text  = $text;
     59    public function __construct( $start, $length, $text ) {
     60        $this->start  = $start;
     61        $this->length = $length;
     62        $this->text   = $text;
    5963    }
    6064}
Note: See TracChangeset for help on using the changeset viewer.