Make WordPress Core


Ignore:
Timestamp:
09/04/2024 04:32:37 AM (5 months ago)
Author:
dmsnell
Message:

HTML API: Respect document compat mode when handling CSS class names.

The HTML API has been behaving as if CSS class name selectors matched class names in an ASCII case-insensitive manner. This is only true if the document in question is set to quirks mode. Unfortunately most documents processed will be set to no-quirks mode, meaning that some CSS behaviors have been matching incorrectly when provided with case variants of class names.

In this patch, the CSS methods have been audited and updated to adhere to the rules governing ASCII case sensitivity when matching classes. This includes add_class(), remove_class(), has_class(), and class_list(). Now, it is assumed that a document is in no-quirks mode unless a full HTML parser infers quirks mode, and these methods will treat class names in a byte-for-byte manner. Otherwise, when a document is in quirks mode, the methods will compare the provided class names against existing class names for the tag in an ASCII case insensitive way, while class_list() will return a lower-cased version of the existing class names.

The lower-casing in class_list() is performed for consistency, since it's possible that multiple case variants of the same comparable class name exists on a tag in the input HTML.

Developed in https://github.com/WordPress/wordpress-develop/pull/7169
Discussed in https://core.trac.wordpress.org/ticket/61531

Props dmsnell, jonsurrell.
See #61531.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessor.php

    r58892 r58985  
    520520        $this->assertTrue( $processor->next_tag( 'script' ) );
    521521    }
     522
     523    /**
     524     * Ensures that the tag processor is case sensitive when removing CSS classes in no-quirks mode.
     525     *
     526     * @ticket 61531
     527     *
     528     * @covers ::remove_class
     529     */
     530    public function test_remove_class_no_quirks_mode() {
     531        $processor = WP_HTML_Processor::create_full_parser( '<!DOCTYPE html><span class="UPPER">' );
     532        $processor->next_tag( 'SPAN' );
     533        $processor->remove_class( 'upper' );
     534        $this->assertSame( '<!DOCTYPE html><span class="UPPER">', $processor->get_updated_html() );
     535
     536        $processor->remove_class( 'UPPER' );
     537        $this->assertSame( '<!DOCTYPE html><span >', $processor->get_updated_html() );
     538    }
     539
     540    /**
     541     * Ensures that the tag processor is case sensitive when adding CSS classes in no-quirks mode.
     542     *
     543     * @ticket 61531
     544     *
     545     * @covers ::add_class
     546     */
     547    public function test_add_class_no_quirks_mode() {
     548        $processor = WP_HTML_Processor::create_full_parser( '<!DOCTYPE html><span class="UPPER">' );
     549        $processor->next_tag( 'SPAN' );
     550        $processor->add_class( 'UPPER' );
     551        $this->assertSame( '<!DOCTYPE html><span class="UPPER">', $processor->get_updated_html() );
     552
     553        $processor->add_class( 'upper' );
     554        $this->assertSame( '<!DOCTYPE html><span class="UPPER upper">', $processor->get_updated_html() );
     555    }
     556
     557    /**
     558     * Ensures that the tag processor is case sensitive when checking has CSS classes in no-quirks mode.
     559     *
     560     * @ticket 61531
     561     *
     562     * @covers ::has_class
     563     */
     564    public function test_has_class_no_quirks_mode() {
     565        $processor = WP_HTML_Processor::create_full_parser( '<!DOCTYPE html><span class="UPPER">' );
     566        $processor->next_tag( 'SPAN' );
     567        $this->assertFalse( $processor->has_class( 'upper' ) );
     568        $this->assertTrue( $processor->has_class( 'UPPER' ) );
     569    }
     570
     571    /**
     572     * Ensures that the tag processor lists unique CSS class names in no-quirks mode.
     573     *
     574     * @ticket 61531
     575     *
     576     * @covers ::class_list
     577     */
     578    public function test_class_list_no_quirks_mode() {
     579        $processor = WP_HTML_Processor::create_full_parser(
     580            /*
     581             * U+00C9 is LATIN CAPITAL LETTER E WITH ACUTE
     582             * U+0045 is LATIN CAPITAL LETTER E
     583             * U+0301 is COMBINING ACUTE ACCENT
     584             *
     585             * This tests not only that the class matching deduplicates the É, but also
     586             * that it treats the same character in different normalization forms as
     587             * distinct, since matching occurs on a byte-for-byte basis.
     588             */
     589            "<!DOCTYPE html><span class='A A a B b \u{C9} \u{45}\u{0301} \u{C9} é'>"
     590        );
     591        $processor->next_tag( 'SPAN' );
     592        $class_list = iterator_to_array( $processor->class_list() );
     593        $this->assertSame(
     594            array( 'A', 'a', 'B', 'b', 'É', "E\u{0301}", 'é' ),
     595            $class_list
     596        );
     597    }
     598
     599    /**
     600     * Ensures that the tag processor is case insensitive when removing CSS classes in quirks mode.
     601     *
     602     * @ticket 61531
     603     *
     604     * @covers ::remove_class
     605     */
     606    public function test_remove_class_quirks_mode() {
     607        $processor = WP_HTML_Processor::create_full_parser( '<span class="uPPER">' );
     608        $processor->next_tag( 'SPAN' );
     609        $processor->remove_class( 'upPer' );
     610        $this->assertSame( '<span >', $processor->get_updated_html() );
     611    }
     612
     613    /**
     614     * Ensures that the tag processor is case insensitive when adding CSS classes in quirks mode.
     615     *
     616     * @ticket 61531
     617     *
     618     * @covers ::add_class
     619     */
     620    public function test_add_class_quirks_mode() {
     621        $processor = WP_HTML_Processor::create_full_parser( '<span class="UPPER">' );
     622        $processor->next_tag( 'SPAN' );
     623        $processor->add_class( 'upper' );
     624
     625        $this->assertSame( '<span class="UPPER">', $processor->get_updated_html() );
     626
     627        $processor->add_class( 'ANOTHER-UPPER' );
     628        $this->assertSame( '<span class="UPPER ANOTHER-UPPER">', $processor->get_updated_html() );
     629    }
     630
     631    /**
     632     * Ensures that the tag processor is case sensitive when checking has CSS classes in quirks mode.
     633     *
     634     * @ticket 61531
     635     *
     636     * @covers ::has_class
     637     */
     638    public function test_has_class_quirks_mode() {
     639        $processor = WP_HTML_Processor::create_full_parser( '<span class="UPPER">' );
     640        $processor->next_tag( 'SPAN' );
     641        $this->assertTrue( $processor->has_class( 'upper' ) );
     642        $this->assertTrue( $processor->has_class( 'UPPER' ) );
     643    }
     644
     645    /**
     646     * Ensures that the tag processor lists unique CSS class names in quirks mode.
     647     *
     648     * @ticket 61531
     649     *
     650     * @covers ::class_list
     651     */
     652    public function test_class_list_quirks_mode() {
     653        $processor = WP_HTML_Processor::create_full_parser(
     654            /*
     655             * U+00C9 is LATIN CAPITAL LETTER E WITH ACUTE
     656             * U+0045 is LATIN CAPITAL LETTER E
     657             * U+0065 is LATIN SMALL LETTER E
     658             * U+0301 is COMBINING ACUTE ACCENT
     659             *
     660             * This tests not only that the class matching deduplicates the É, but also
     661             * that it treats the same character in different normalization forms as
     662             * distinct, since matching occurs on a byte-for-byte basis.
     663             */
     664            "<span class='A A a B b \u{C9} \u{45}\u{301} \u{C9} é \u{65}\u{301}'>"
     665        );
     666        $processor->next_tag( 'SPAN' );
     667        $class_list = iterator_to_array( $processor->class_list() );
     668        $this->assertSame(
     669            array( 'a', 'b', 'É', "e\u{301}", 'é' ),
     670            $class_list
     671        );
     672    }
    522673}
Note: See TracChangeset for help on using the changeset viewer.