Make WordPress Core

Changeset 56685


Ignore:
Timestamp:
09/25/2023 07:13:27 PM (14 months ago)
Author:
Bernhard Reiter
Message:

HTML API: Remove all duplicate copies of an attribute when removing.

When encountering an HTML tag with duplicate copies of an attribute the tag processor ignores the duplicate values, according to the specification. However, when removing an attribute it must remove all copies of that attribute lest one of the duplicates becomes the primary and it appears as if no attributes were removed.

In this patch we're adding tests that will be used to ensure that all attribute copies are removed from a tag when one is request to be removed.

Before

<?php
$p = new WP_HTML_Tag_Processor( '<br id=one id="two" id='three' id>' );
$p->next_tag();
$p->remove_attribute( 'id' );
$p->get_updated_html();
// <br id="two" id='three' id>

After

<?php
$p = new WP_HTML_Tag_Processor( '<br id=one id="two" id='three' id>' );
$p->next_tag();
$p->remove_attribute( 'id' );
$p->get_updated_html();
// <br>

Previously we have been overlooking duplicate attributes since they don't have an impact on what parses into the DOM. However, as one unit test affirmed (asserting the presence of the bug in the tag processor) when removing an attribute where duplicates exist this meant we ended up changing the value of an attribute instead of removing it.

In this patch we're tracking the text spans of the parsed duplicate attributes so that if we attempt to remove them then we'll have the appropriate information necessary to do so. When an attribute isn't removed we'll simply forget about the tracked duplicates. This involves some overhead for normal operation when in fact there are duplicate attributes on a tag, but that overhead is minimal in the form of integer pairs of indices for each duplicated attribute.

Props dmsnell, zieladam.
Merges [56684] to the 6.3 branch.
Fixes #58119.

Location:
branches/6.3
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/6.3/src/wp-includes/html-api/class-wp-html-tag-processor.php

    r56564 r56685  
    406406     */
    407407    private $attributes = array();
     408
     409    /**
     410     * Tracks spans of duplicate attributes on a given tag, used for removing
     411     * all copies of an attribute when calling `remove_attribute()`.
     412     *
     413     * @since 6.3.2
     414     *
     415     * @var (WP_HTML_Span[])[]|null
     416     */
     417    private $duplicate_attributes = null;
    408418
    409419    /**
     
    12871297                ! $has_value
    12881298            );
     1299
     1300            return true;
     1301        }
     1302
     1303        /*
     1304         * Track the duplicate attributes so if we remove it, all disappear together.
     1305         *
     1306         * While `$this->duplicated_attributes` could always be stored as an `array()`,
     1307         * which would simplify the logic here, storing a `null` and only allocating
     1308         * an array when encountering duplicates avoids needless allocations in the
     1309         * normative case of parsing tags with no duplicate attributes.
     1310         */
     1311        $duplicate_span = new WP_HTML_Span( $attribute_start, $attribute_end );
     1312        if ( null === $this->duplicate_attributes ) {
     1313            $this->duplicate_attributes = array( $comparable_name => array( $duplicate_span ) );
     1314        } elseif ( ! array_key_exists( $comparable_name, $this->duplicate_attributes ) ) {
     1315            $this->duplicate_attributes[ $comparable_name ] = array( $duplicate_span );
     1316        } else {
     1317            $this->duplicate_attributes[ $comparable_name ][] = $duplicate_span;
    12891318        }
    12901319
     
    13081337    private function after_tag() {
    13091338        $this->get_updated_html();
    1310         $this->tag_name_starts_at = null;
    1311         $this->tag_name_length    = null;
    1312         $this->tag_ends_at        = null;
    1313         $this->is_closing_tag     = null;
    1314         $this->attributes         = array();
     1339        $this->tag_name_starts_at   = null;
     1340        $this->tag_name_length      = null;
     1341        $this->tag_ends_at          = null;
     1342        $this->is_closing_tag       = null;
     1343        $this->attributes           = array();
     1344        $this->duplicate_attributes = null;
    13151345    }
    13161346
     
    20812111        );
    20822112
     2113        // Removes any duplicated attributes if they were also present.
     2114        if ( null !== $this->duplicate_attributes && array_key_exists( $name, $this->duplicate_attributes ) ) {
     2115            foreach ( $this->duplicate_attributes[ $name ] as $attribute_token ) {
     2116                $this->lexical_updates[] = new WP_HTML_Text_Replacement(
     2117                    $attribute_token->start,
     2118                    $attribute_token->end,
     2119                    ''
     2120                );
     2121            }
     2122        }
     2123
    20832124        return true;
    20842125    }
  • branches/6.3/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php

    r56564 r56685  
    10501050     * all its instances and output just `<div />`.
    10511051     *
    1052      * Today, however, WP_HTML_Tag_Processor only removes the first such attribute. It seems like a corner case
    1053      * and introducing additional complexity to correctly handle this scenario doesn't seem to be worth it.
    1054      * Let's revisit if and when this becomes a problem.
    1055      *
    1056      * This test is in place to confirm this behavior, which while incorrect, is well-defined.
    1057      * A later fix introduced to the Tag Processor should update this test to reflect the
    1058      * wanted and correct behavior.
    1059      *
    1060      * @ticket 56299
     1052     * @since 6.3.2 Removes all duplicated attributes as expected.
     1053     *
     1054     * @ticket 58119
    10611055     *
    10621056     * @covers WP_HTML_Tag_Processor::remove_attribute
     
    10671061        $p->remove_attribute( 'id' );
    10681062
    1069         $this->assertSame(
    1070             '<div  id="ignored-id"><span id="second">Text</span></div>',
     1063        $this->assertStringNotContainsString(
     1064            'update-me',
    10711065            $p->get_updated_html(),
    10721066            'First attribute (when duplicates exist) was not removed'
     
    10881082            $p->get_updated_html(),
    10891083            'Attribute was not removed'
     1084        );
     1085    }
     1086
     1087    /**
     1088     * @ticket 58119
     1089     *
     1090     * @since 6.3.2 Removes all duplicated attributes as expected.
     1091     *
     1092     * @covers WP_HTML_Tag_Processor::remove_attribute
     1093     *
     1094     * @dataProvider data_html_with_duplicated_attributes
     1095     */
     1096    public function test_remove_attribute_with_duplicated_attributes_removes_all_of_them( $html_with_duplicate_attributes, $attribute_to_remove ) {
     1097        $p = new WP_HTML_Tag_Processor( $html_with_duplicate_attributes );
     1098        $p->next_tag();
     1099
     1100        $p->remove_attribute( $attribute_to_remove );
     1101        $this->assertNull( $p->get_attribute( $attribute_to_remove ), 'Failed to remove all copies of an attribute when duplicated in modified source.' );
     1102
     1103        // Recreate a tag processor with the updated HTML after removing the attribute.
     1104        $p = new WP_HTML_Tag_Processor( $p->get_updated_html() );
     1105        $p->next_tag();
     1106        $this->assertNull( $p->get_attribute( $attribute_to_remove ), 'Failed to remove all copies of duplicated attributes when getting updated HTML.' );
     1107    }
     1108
     1109    /**
     1110     * @ticket 58119
     1111     *
     1112     * @since 6.3.2 Removes all duplicated attributes as expected.
     1113     *
     1114     * @covers WP_HTML_Tag_Processor::remove_attribute
     1115     */
     1116    public function test_previous_duplicated_attributes_are_not_removed_on_successive_tag_removal() {
     1117        $p = new WP_HTML_Tag_Processor( '<span id=one id=two id=three><span id=four>' );
     1118        $p->next_tag();
     1119        $p->next_tag();
     1120        $p->remove_attribute( 'id' );
     1121
     1122        $this->assertSame( '<span id=one id=two id=three><span >', $p->get_updated_html() );
     1123    }
     1124
     1125    /**
     1126     * Data provider.
     1127     *
     1128     * @ticket 58119
     1129     *
     1130     * @return array[].
     1131     */
     1132    public function data_html_with_duplicated_attributes() {
     1133        return array(
     1134            'Double attributes'               => array( '<div id=one id=two>', 'id' ),
     1135            'Triple attributes'               => array( '<div id=one id=two id=three>', 'id' ),
     1136            'Duplicates around another'       => array( '<img src="test.png" alt="kites flying in the wind" src="kites.jpg">', 'src' ),
     1137            'Case-variants of attribute'      => array( '<button disabled inert DISABLED dISaBled INERT DisABleD>', 'disabled' ),
     1138            'Case-variants of attribute name' => array( '<button disabled inert DISABLED dISaBled INERT DisABleD>', 'DISABLED' ),
    10901139        );
    10911140    }
Note: See TracChangeset for help on using the changeset viewer.