Make WordPress Core


Ignore:
Timestamp:
09/26/2023 09:15:21 AM (3 years ago)
Author:
Bernhard Reiter
Message:

HTML API: Add class name utilities has_class() and class_list().

This patch adds two new public methods to the HTML Tag Processor:

  • has_class() indicates if a matched tag contains a given CSS class name.
  • class_list() returns a generator to iterate over all the class names in a matched tag.

Included in this patch is a refactoring of the internal logic when matching
a tag to reuse the new has_class() function. Previously it was relying on
optimized code in the matches() function which performed byte-for-byte
class name comparison. With the change in this patch it will perform class
name matching on the decoded value, which might differ if a class attribute
contains character references.

These methods may be useful for running more complicated queries based
on the presence or absence of CSS class names. The use of these methods
avoids the need to manually decode the class attribute as reported by
$process->get_attribute( 'class' ).

Props dmsnell.
Fixes #59209.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-tag-processor.php

    r56684 r56703  
    624624
    625625        return true;
     626    }
     627
     628
     629    /**
     630     * Generator for a foreach loop to step through each class name for the matched tag.
     631     *
     632     * This generator function is designed to be used inside a "foreach" loop.
     633     *
     634     * Example:
     635     *
     636     *     $p = new WP_HTML_Tag_Processor( "<div class='free &lt;egg&lt;\tlang-en'>" );
     637     *     $p->next_tag();
     638     *     foreach ( $p->class_list() as $class_name ) {
     639     *         echo "{$class_name} ";
     640     *     }
     641     *     // Outputs: "free <egg> lang-en "
     642     *
     643     * @since 6.4.0
     644     */
     645    public function class_list() {
     646        /** @var string $class contains the string value of the class attribute, with character references decoded. */
     647        $class = $this->get_attribute( 'class' );
     648
     649        if ( ! is_string( $class ) ) {
     650            return;
     651        }
     652
     653        $seen = array();
     654
     655        $at = 0;
     656        while ( $at < strlen( $class ) ) {
     657            // Skip past any initial boundary characters.
     658            $at += strspn( $class, " \t\f\r\n", $at );
     659            if ( $at >= strlen( $class ) ) {
     660                return;
     661            }
     662
     663            // Find the byte length until the next boundary.
     664            $length = strcspn( $class, " \t\f\r\n", $at );
     665            if ( 0 === $length ) {
     666                return;
     667            }
     668
     669            /*
     670             * CSS class names are case-insensitive in the ASCII range.
     671             *
     672             * @see https://www.w3.org/TR/CSS2/syndata.html#x1
     673             */
     674            $name = strtolower( substr( $class, $at, $length ) );
     675            $at  += $length;
     676
     677            /*
     678             * It's expected that the number of class names for a given tag is relatively small.
     679             * Given this, it is probably faster overall to scan an array for a value rather
     680             * than to use the class name as a key and check if it's a key of $seen.
     681             */
     682            if ( in_array( $name, $seen, true ) ) {
     683                continue;
     684            }
     685
     686            $seen[] = $name;
     687            yield $name;
     688        }
     689    }
     690
     691
     692    /**
     693     * Returns if a matched tag contains the given ASCII case-insensitive class name.
     694     *
     695     * @since 6.4.0
     696     *
     697     * @param string $wanted_class Look for this CSS class name, ASCII case-insensitive.
     698     * @return bool|null Whether the matched tag contains the given class name, or null if not matched.
     699     */
     700    public function has_class( $wanted_class ) {
     701        if ( ! $this->tag_name_starts_at ) {
     702            return null;
     703        }
     704
     705        $wanted_class = strtolower( $wanted_class );
     706
     707        foreach ( $this->class_list() as $class_name ) {
     708            if ( $class_name === $wanted_class ) {
     709                return true;
     710            }
     711        }
     712
     713        return false;
    626714    }
    627715
     
    23482436        }
    23492437
    2350         $needs_class_name = null !== $this->sought_class_name;
    2351 
    2352         if ( $needs_class_name && ! isset( $this->attributes['class'] ) ) {
     2438        if ( null !== $this->sought_class_name && ! $this->has_class( $this->sought_class_name ) ) {
    23532439            return false;
    23542440        }
    23552441
    2356         /*
    2357          * Match byte-for-byte (case-sensitive and encoding-form-sensitive) on the class name.
    2358          *
    2359          * This will overlook certain classes that exist in other lexical variations
    2360          * than was supplied to the search query, but requires more complicated searching.
    2361          */
    2362         if ( $needs_class_name ) {
    2363             $class_start = $this->attributes['class']->value_starts_at;
    2364             $class_end   = $class_start + $this->attributes['class']->value_length;
    2365             $class_at    = $class_start;
    2366 
    2367             /*
    2368              * Ensure that boundaries surround the class name to avoid matching on
    2369              * substrings of a longer name. For example, the sequence "not-odd"
    2370              * should not match for the class "odd" even though "odd" is found
    2371              * within the class attribute text.
    2372              *
    2373              * See https://html.spec.whatwg.org/#attributes-3
    2374              * See https://html.spec.whatwg.org/#space-separated-tokens
    2375              */
    2376             while (
    2377                 // phpcs:ignore WordPress.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
    2378                 false !== ( $class_at = strpos( $this->html, $this->sought_class_name, $class_at ) ) &&
    2379                 $class_at < $class_end
    2380             ) {
    2381                 /*
    2382                  * Verify this class starts at a boundary.
    2383                  */
    2384                 if ( $class_at > $class_start ) {
    2385                     $character = $this->html[ $class_at - 1 ];
    2386 
    2387                     if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) {
    2388                         $class_at += strlen( $this->sought_class_name );
    2389                         continue;
    2390                     }
    2391                 }
    2392 
    2393                 /*
    2394                  * Verify this class ends at a boundary as well.
    2395                  */
    2396                 if ( $class_at + strlen( $this->sought_class_name ) < $class_end ) {
    2397                     $character = $this->html[ $class_at + strlen( $this->sought_class_name ) ];
    2398 
    2399                     if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) {
    2400                         $class_at += strlen( $this->sought_class_name );
    2401                         continue;
    2402                     }
    2403                 }
    2404 
    2405                 return true;
    2406             }
    2407 
    2408             return false;
    2409         }
    2410 
    24112442        return true;
    24122443    }
Note: See TracChangeset for help on using the changeset viewer.