Make WordPress Core


Ignore:
Timestamp:
09/04/2024 04:32:37 AM (17 months ago)
Author:
dmsnell
Message:

HTML API: Respect document compat mode when handling CSS class names.

The HTML API has been behaving as if CSS class name selectors matched class names in an ASCII case-insensitive manner. This is only true if the document in question is set to quirks mode. Unfortunately most documents processed will be set to no-quirks mode, meaning that some CSS behaviors have been matching incorrectly when provided with case variants of class names.

In this patch, the CSS methods have been audited and updated to adhere to the rules governing ASCII case sensitivity when matching classes. This includes add_class(), remove_class(), has_class(), and class_list(). Now, it is assumed that a document is in no-quirks mode unless a full HTML parser infers quirks mode, and these methods will treat class names in a byte-for-byte manner. Otherwise, when a document is in quirks mode, the methods will compare the provided class names against existing class names for the tag in an ASCII case insensitive way, while class_list() will return a lower-cased version of the existing class names.

The lower-casing in class_list() is performed for consistency, since it's possible that multiple case variants of the same comparable class name exists on a tag in the input HTML.

Developed in https://github.com/WordPress/wordpress-develop/pull/7169
Discussed in https://core.trac.wordpress.org/ticket/61531

Props dmsnell, jonsurrell.
See #61531.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-tag-processor.php

    r58977 r58985  
    511511     */
    512512    protected $parser_state = self::STATE_READY;
     513
     514    /**
     515     * Indicates if the document is in quirks mode or no-quirks mode.
     516     *
     517     *  Impact on HTML parsing:
     518     *
     519     *   - In `NO_QUIRKS_MODE` (also known as "standard mode"):
     520     *       - CSS class and ID selectors match byte-for-byte (case-sensitively).
     521     *       - A TABLE start tag `<table>` implicitly closes any open `P` element.
     522     *
     523     *   - In `QUIRKS_MODE`:
     524     *       - CSS class and ID selectors match match in an ASCII case-insensitive manner.
     525     *       - A TABLE start tag `<table>` opens a `TABLE` element as a child of a `P`
     526     *         element if one is open.
     527     *
     528     * Quirks and no-quirks mode are thus mostly about styling, but have an impact when
     529     * tables are found inside paragraph elements.
     530     *
     531     * @see self::QUIRKS_MODE
     532     * @see self::NO_QUIRKS_MODE
     533     *
     534     * @since 6.7.0
     535     *
     536     * @var string
     537     */
     538    protected $compat_mode = self::NO_QUIRKS_MODE;
    513539
    514540    /**
     
    11561182        $seen = array();
    11571183
     1184        $is_quirks = self::QUIRKS_MODE === $this->compat_mode;
     1185
    11581186        $at = 0;
    11591187        while ( $at < strlen( $class ) ) {
     
    11701198            }
    11711199
    1172             /*
    1173              * CSS class names are case-insensitive in the ASCII range.
    1174              *
    1175              * @see https://www.w3.org/TR/CSS2/syndata.html#x1
    1176              */
    1177             $name = str_replace( "\x00", "\u{FFFD}", strtolower( substr( $class, $at, $length ) ) );
    1178             $at  += $length;
     1200            $name = str_replace( "\x00", "\u{FFFD}", substr( $class, $at, $length ) );
     1201            if ( $is_quirks ) {
     1202                $name = strtolower( $name );
     1203            }
     1204            $at += $length;
    11791205
    11801206            /*
     
    12061232        }
    12071233
    1208         $wanted_class = strtolower( $wanted_class );
    1209 
     1234        $case_insensitive = self::QUIRKS_MODE === $this->compat_mode;
     1235
     1236        $wanted_length = strlen( $wanted_class );
    12101237        foreach ( $this->class_list() as $class_name ) {
    1211             if ( $class_name === $wanted_class ) {
     1238            if (
     1239                strlen( $class_name ) === $wanted_length &&
     1240                0 === substr_compare( $class_name, $wanted_class, 0, strlen( $wanted_class ), $case_insensitive )
     1241            ) {
    12121242                return true;
    12131243            }
     
    22972327        $modified = false;
    22982328
     2329        $seen      = array();
     2330        $to_remove = array();
     2331        $is_quirks = self::QUIRKS_MODE === $this->compat_mode;
     2332        if ( $is_quirks ) {
     2333            foreach ( $this->classname_updates as $updated_name => $action ) {
     2334                if ( self::REMOVE_CLASS === $action ) {
     2335                    $to_remove[] = strtolower( $updated_name );
     2336                }
     2337            }
     2338        } else {
     2339            foreach ( $this->classname_updates as $updated_name => $action ) {
     2340                if ( self::REMOVE_CLASS === $action ) {
     2341                    $to_remove[] = $updated_name;
     2342                }
     2343            }
     2344        }
     2345
    22992346        // Remove unwanted classes by only copying the new ones.
    23002347        $existing_class_length = strlen( $existing_class );
     
    23122359            }
    23132360
    2314             $name = substr( $existing_class, $at, $name_length );
    2315             $at  += $name_length;
    2316 
    2317             // If this class is marked for removal, start processing the next one.
    2318             $remove_class = (
    2319                 isset( $this->classname_updates[ $name ] ) &&
    2320                 self::REMOVE_CLASS === $this->classname_updates[ $name ]
    2321             );
    2322 
    2323             // If a class has already been seen then skip it; it should not be added twice.
    2324             if ( ! $remove_class ) {
    2325                 $this->classname_updates[ $name ] = self::SKIP_CLASS;
    2326             }
    2327 
    2328             if ( $remove_class ) {
     2361            $name                  = substr( $existing_class, $at, $name_length );
     2362            $comparable_class_name = $is_quirks ? strtolower( $name ) : $name;
     2363            $at                   += $name_length;
     2364
     2365            // If this class is marked for removal, remove it and move on to the next one.
     2366            if ( in_array( $comparable_class_name, $to_remove, true ) ) {
    23292367                $modified = true;
    23302368                continue;
    23312369            }
     2370
     2371            // If a class has already been seen then skip it; it should not be added twice.
     2372            if ( in_array( $comparable_class_name, $seen, true ) ) {
     2373                continue;
     2374            }
     2375
     2376            $seen[] = $comparable_class_name;
    23322377
    23332378            /*
     
    23512396        // Add new classes by appending those which haven't already been seen.
    23522397        foreach ( $this->classname_updates as $name => $operation ) {
    2353             if ( self::ADD_CLASS === $operation ) {
     2398            $comparable_name = $is_quirks ? strtolower( $name ) : $name;
     2399            if ( self::ADD_CLASS === $operation && ! in_array( $comparable_name, $seen, true ) ) {
    23542400                $modified = true;
    23552401
     
    39333979        }
    39343980
     3981        if ( self::QUIRKS_MODE !== $this->compat_mode ) {
     3982            $this->classname_updates[ $class_name ] = self::ADD_CLASS;
     3983            return true;
     3984        }
     3985
     3986        /*
     3987         * Because class names are matched ASCII-case-insensitively in quirks mode,
     3988         * this needs to see if a case variant of the given class name is already
     3989         * enqueued and update that existing entry, if so. This picks the casing of
     3990         * the first-provided class name for all lexical variations.
     3991         */
     3992        $class_name_length = strlen( $class_name );
     3993        foreach ( $this->classname_updates as $updated_name => $action ) {
     3994            if (
     3995                strlen( $updated_name ) === $class_name_length &&
     3996                0 === substr_compare( $updated_name, $class_name, 0, $class_name_length, true )
     3997            ) {
     3998                $this->classname_updates[ $updated_name ] = self::ADD_CLASS;
     3999                return true;
     4000            }
     4001        }
     4002
    39354003        $this->classname_updates[ $class_name ] = self::ADD_CLASS;
    3936 
    39374004        return true;
    39384005    }
     
    39544021        }
    39554022
    3956         if ( null !== $this->tag_name_starts_at ) {
     4023        if ( self::QUIRKS_MODE !== $this->compat_mode ) {
    39574024            $this->classname_updates[ $class_name ] = self::REMOVE_CLASS;
    3958         }
    3959 
     4025            return true;
     4026        }
     4027
     4028        /*
     4029         * Because class names are matched ASCII-case-insensitively in quirks mode,
     4030         * this needs to see if a case variant of the given class name is already
     4031         * enqueued and update that existing entry, if so. This picks the casing of
     4032         * the first-provided class name for all lexical variations.
     4033         */
     4034        $class_name_length = strlen( $class_name );
     4035        foreach ( $this->classname_updates as $updated_name => $action ) {
     4036            if (
     4037                strlen( $updated_name ) === $class_name_length &&
     4038                0 === substr_compare( $updated_name, $class_name, 0, $class_name_length, true )
     4039            ) {
     4040                $this->classname_updates[ $updated_name ] = self::REMOVE_CLASS;
     4041                return true;
     4042            }
     4043        }
     4044
     4045        $this->classname_updates[ $class_name ] = self::REMOVE_CLASS;
    39604046        return true;
    39614047    }
     
    43524438
    43534439    /**
     4440     * No-quirks mode document compatability mode.
     4441     *
     4442     * > In no-quirks mode, the behavior is (hopefully) the desired behavior
     4443     * > described by the modern HTML and CSS specifications.
     4444     *
     4445     * @see self::$compat_mode
     4446     * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Quirks_Mode_and_Standards_Mode
     4447     *
     4448     * @since 6.7.0
     4449     *
     4450     * @var string
     4451     */
     4452    const NO_QUIRKS_MODE = 'no-quirks-mode';
     4453
     4454    /**
     4455     * Quirks mode document compatability mode.
     4456     *
     4457     * > In quirks mode, layout emulates behavior in Navigator 4 and Internet
     4458     * > Explorer 5. This is essential in order to support websites that were
     4459     * > built before the widespread adoption of web standards.
     4460     *
     4461     * @see self::$compat_mode
     4462     * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Quirks_Mode_and_Standards_Mode
     4463     *
     4464     * @since 6.7.0
     4465     *
     4466     * @var string
     4467     */
     4468    const QUIRKS_MODE = 'quirks-mode';
     4469
     4470    /**
    43544471     * Indicates that a span of text may contain any combination of significant
    43554472     * kinds of characters: NULL bytes, whitespace, and others.
Note: See TracChangeset for help on using the changeset viewer.