Make WordPress Core

Changeset 56703


Ignore:
Timestamp:
09/26/2023 09:15:21 AM (9 months ago)
Author:
Bernhard Reiter
Message:

HTML API: Add class name utilities has_class() and class_list().

This patch adds two new public methods to the HTML Tag Processor:

  • has_class() indicates if a matched tag contains a given CSS class name.
  • class_list() returns a generator to iterate over all the class names in a matched tag.

Included in this patch is a refactoring of the internal logic when matching
a tag to reuse the new has_class() function. Previously it was relying on
optimized code in the matches() function which performed byte-for-byte
class name comparison. With the change in this patch it will perform class
name matching on the decoded value, which might differ if a class attribute
contains character references.

These methods may be useful for running more complicated queries based
on the presence or absence of CSS class names. The use of these methods
avoids the need to manually decode the class attribute as reported by
$process->get_attribute( 'class' ).

Props dmsnell.
Fixes #59209.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-tag-processor.php

    r56684 r56703  
    624624
    625625        return true;
     626    }
     627
     628
     629    /**
     630     * Generator for a foreach loop to step through each class name for the matched tag.
     631     *
     632     * This generator function is designed to be used inside a "foreach" loop.
     633     *
     634     * Example:
     635     *
     636     *     $p = new WP_HTML_Tag_Processor( "<div class='free &lt;egg&lt;\tlang-en'>" );
     637     *     $p->next_tag();
     638     *     foreach ( $p->class_list() as $class_name ) {
     639     *         echo "{$class_name} ";
     640     *     }
     641     *     // Outputs: "free <egg> lang-en "
     642     *
     643     * @since 6.4.0
     644     */
     645    public function class_list() {
     646        /** @var string $class contains the string value of the class attribute, with character references decoded. */
     647        $class = $this->get_attribute( 'class' );
     648
     649        if ( ! is_string( $class ) ) {
     650            return;
     651        }
     652
     653        $seen = array();
     654
     655        $at = 0;
     656        while ( $at < strlen( $class ) ) {
     657            // Skip past any initial boundary characters.
     658            $at += strspn( $class, " \t\f\r\n", $at );
     659            if ( $at >= strlen( $class ) ) {
     660                return;
     661            }
     662
     663            // Find the byte length until the next boundary.
     664            $length = strcspn( $class, " \t\f\r\n", $at );
     665            if ( 0 === $length ) {
     666                return;
     667            }
     668
     669            /*
     670             * CSS class names are case-insensitive in the ASCII range.
     671             *
     672             * @see https://www.w3.org/TR/CSS2/syndata.html#x1
     673             */
     674            $name = strtolower( substr( $class, $at, $length ) );
     675            $at  += $length;
     676
     677            /*
     678             * It's expected that the number of class names for a given tag is relatively small.
     679             * Given this, it is probably faster overall to scan an array for a value rather
     680             * than to use the class name as a key and check if it's a key of $seen.
     681             */
     682            if ( in_array( $name, $seen, true ) ) {
     683                continue;
     684            }
     685
     686            $seen[] = $name;
     687            yield $name;
     688        }
     689    }
     690
     691
     692    /**
     693     * Returns if a matched tag contains the given ASCII case-insensitive class name.
     694     *
     695     * @since 6.4.0
     696     *
     697     * @param string $wanted_class Look for this CSS class name, ASCII case-insensitive.
     698     * @return bool|null Whether the matched tag contains the given class name, or null if not matched.
     699     */
     700    public function has_class( $wanted_class ) {
     701        if ( ! $this->tag_name_starts_at ) {
     702            return null;
     703        }
     704
     705        $wanted_class = strtolower( $wanted_class );
     706
     707        foreach ( $this->class_list() as $class_name ) {
     708            if ( $class_name === $wanted_class ) {
     709                return true;
     710            }
     711        }
     712
     713        return false;
    626714    }
    627715
     
    23482436        }
    23492437
    2350         $needs_class_name = null !== $this->sought_class_name;
    2351 
    2352         if ( $needs_class_name && ! isset( $this->attributes['class'] ) ) {
     2438        if ( null !== $this->sought_class_name && ! $this->has_class( $this->sought_class_name ) ) {
    23532439            return false;
    23542440        }
    23552441
    2356         /*
    2357          * Match byte-for-byte (case-sensitive and encoding-form-sensitive) on the class name.
    2358          *
    2359          * This will overlook certain classes that exist in other lexical variations
    2360          * than was supplied to the search query, but requires more complicated searching.
    2361          */
    2362         if ( $needs_class_name ) {
    2363             $class_start = $this->attributes['class']->value_starts_at;
    2364             $class_end   = $class_start + $this->attributes['class']->value_length;
    2365             $class_at    = $class_start;
    2366 
    2367             /*
    2368              * Ensure that boundaries surround the class name to avoid matching on
    2369              * substrings of a longer name. For example, the sequence "not-odd"
    2370              * should not match for the class "odd" even though "odd" is found
    2371              * within the class attribute text.
    2372              *
    2373              * See https://html.spec.whatwg.org/#attributes-3
    2374              * See https://html.spec.whatwg.org/#space-separated-tokens
    2375              */
    2376             while (
    2377                 // phpcs:ignore WordPress.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
    2378                 false !== ( $class_at = strpos( $this->html, $this->sought_class_name, $class_at ) ) &&
    2379                 $class_at < $class_end
    2380             ) {
    2381                 /*
    2382                  * Verify this class starts at a boundary.
    2383                  */
    2384                 if ( $class_at > $class_start ) {
    2385                     $character = $this->html[ $class_at - 1 ];
    2386 
    2387                     if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) {
    2388                         $class_at += strlen( $this->sought_class_name );
    2389                         continue;
    2390                     }
    2391                 }
    2392 
    2393                 /*
    2394                  * Verify this class ends at a boundary as well.
    2395                  */
    2396                 if ( $class_at + strlen( $this->sought_class_name ) < $class_end ) {
    2397                     $character = $this->html[ $class_at + strlen( $this->sought_class_name ) ];
    2398 
    2399                     if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) {
    2400                         $class_at += strlen( $this->sought_class_name );
    2401                         continue;
    2402                     }
    2403                 }
    2404 
    2405                 return true;
    2406             }
    2407 
    2408             return false;
    2409         }
    2410 
    24112442        return true;
    24122443    }
  • trunk/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php

    r56684 r56703  
    500500
    501501    /**
     502     * @ticket 59209
     503     *
     504     * @covers WP_HTML_Tag_Processor::next_tag
     505     */
     506    public function test_next_tag_matches_decoded_class_names() {
     507        $p = new WP_HTML_Tag_Processor( '<div class="&lt;egg&gt;">' );
     508
     509        $this->assertTrue( $p->next_tag( array( 'class_name' => '<egg>' ) ), 'Failed to find tag with HTML-encoded class name.' );
     510    }
     511
     512    /**
    502513     * @ticket 56299
    503514     * @ticket 57852
     
    19551966            'STYLE'            => array( '<style>* { margin: 0 }</style><div target>' ),
    19561967            'STYLE hiding DIV' => array( '<style>li::before { content: "<div non-target>" }</style><div target>' ),
     1968        );
     1969    }
     1970
     1971    /**
     1972     * @ticket 59209
     1973     *
     1974     * @covers WP_HTML_Tag_Processor::class_list
     1975     */
     1976    public function test_class_list_empty_when_missing_class() {
     1977        $p = new WP_HTML_Tag_Processor( '<div>' );
     1978        $p->next_tag();
     1979
     1980        $found_classes = false;
     1981        foreach ( $p->class_list() as $class ) {
     1982            $found_classes = true;
     1983        }
     1984
     1985        $this->assertFalse( $found_classes, 'Found classes when none exist.' );
     1986    }
     1987
     1988    /**
     1989     * @ticket 59209
     1990     *
     1991     * @covers WP_HTML_Tag_Processor::class_list
     1992     */
     1993    public function test_class_list_empty_when_class_is_boolean() {
     1994        $p = new WP_HTML_Tag_Processor( '<div class>' );
     1995        $p->next_tag();
     1996
     1997        $found_classes = false;
     1998        foreach ( $p->class_list() as $class ) {
     1999            $found_classes = true;
     2000        }
     2001
     2002        $this->assertFalse( $found_classes, 'Found classes when none exist.' );
     2003    }
     2004
     2005    /**
     2006     * @ticket 59209
     2007     *
     2008     * @covers WP_HTML_Tag_Processor::class_list
     2009     */
     2010    public function test_class_list_empty_when_class_is_empty() {
     2011        $p = new WP_HTML_Tag_Processor( '<div class="">' );
     2012        $p->next_tag();
     2013
     2014        $found_classes = false;
     2015        foreach ( $p->class_list() as $class ) {
     2016            $found_classes = true;
     2017        }
     2018
     2019        $this->assertFalse( $found_classes, 'Found classes when none exist.' );
     2020    }
     2021
     2022    /**
     2023     * @ticket 59209
     2024     *
     2025     * @covers WP_HTML_Tag_Processor::class_list
     2026     */
     2027    public function test_class_list_visits_each_class_in_order() {
     2028        $p = new WP_HTML_Tag_Processor( '<div class="one two three">' );
     2029        $p->next_tag();
     2030
     2031        $found_classes = array();
     2032        foreach ( $p->class_list() as $class ) {
     2033            $found_classes[] = $class;
     2034        }
     2035
     2036        $this->assertSame( array( 'one', 'two', 'three' ), $found_classes, 'Failed to visit the class names in their original order.' );
     2037    }
     2038
     2039    /**
     2040     * @ticket 59209
     2041     *
     2042     * @covers WP_HTML_Tag_Processor::class_list
     2043     */
     2044    public function test_class_list_decodes_class_names() {
     2045        $p = new WP_HTML_Tag_Processor( '<div class="&notin;-class &lt;egg&gt; &#xff03;">' );
     2046        $p->next_tag();
     2047
     2048        $found_classes = array();
     2049        foreach ( $p->class_list() as $class ) {
     2050            $found_classes[] = $class;
     2051        }
     2052
     2053        $this->assertSame( array( '∉-class', '<egg>', "\u{ff03}" ), $found_classes, 'Failed to report class names in their decoded form.' );
     2054    }
     2055
     2056    /**
     2057     * @ticket 59209
     2058     *
     2059     * @covers WP_HTML_Tag_Processor::class_list
     2060     */
     2061    public function test_class_list_visits_unique_class_names_only_once() {
     2062        $p = new WP_HTML_Tag_Processor( '<div class="one one &#x6f;ne">' );
     2063        $p->next_tag();
     2064
     2065        $found_classes = array();
     2066        foreach ( $p->class_list() as $class ) {
     2067            $found_classes[] = $class;
     2068        }
     2069
     2070        $this->assertSame( array( 'one' ), $found_classes, 'Visited multiple copies of the same class name when it should have skipped the duplicates.' );
     2071    }
     2072
     2073    /**
     2074     * @ticket 59209
     2075     *
     2076     * @covers WP_HTML_Tag_Processor::has_class
     2077     *
     2078     * @dataProvider data_html_with_variations_of_class_values_and_sought_class_names
     2079     *
     2080     * @param string $html         Contains a tag optionally containing a `class` attribute.
     2081     * @param string $sought_class Name of class to find in the input tag's `class`.
     2082     * @param bool   $has_class    Whether the sought class exists in the given HTML.
     2083     */
     2084    public function test_has_class_handles_expected_class_name_variations( $html, $sought_class, $has_class ) {
     2085        $p = new WP_HTML_Tag_Processor( $html );
     2086        $p->next_tag();
     2087
     2088        if ( $has_class ) {
     2089            $this->assertTrue( $p->has_class( $sought_class ), "Failed to find expected class {$sought_class}." );
     2090        } else {
     2091            $this->assertFalse( $p->has_class( $sought_class ), "Found class {$sought_class} when it doesn't exist." );
     2092        }
     2093    }
     2094
     2095    /**
     2096     * Data provider.
     2097     *
     2098     * @return array[]
     2099     */
     2100    public function data_html_with_variations_of_class_values_and_sought_class_names() {
     2101        return array(
     2102            'Tag without any classes'      => array( '<div>', 'foo', false ),
     2103            'Tag with boolean class'       => array( '<img class>', 'foo', false ),
     2104            'Tag with empty class'         => array( '<p class="">', 'foo', false ),
     2105            'Tag with exact match'         => array( '<button class="foo">', 'foo', true ),
     2106            'Tag with duplicate matches'   => array( '<span class="foo bar foo">', 'foo', true ),
     2107            'Tag with non-initial match'   => array( '<section class="bar foo">', 'foo', true ),
     2108            'Tag with encoded match'       => array( '<main class="&hellip;">', '…', true ),
     2109            'Class with tab separator'     => array( "<div class='one\ttwo'>", 'two', true ),
     2110            'Class with newline separator' => array( "<div class='one\ntwo\n'>", 'two', true ),
     2111            'False duplicate attribute'    => array( '<img class=dog class=cat>', 'cat', false ),
    19572112        );
    19582113    }
Note: See TracChangeset for help on using the changeset viewer.