Make WordPress Core

Changeset 56702


Ignore:
Timestamp:
09/26/2023 08:18:25 AM (7 months ago)
Author:
Bernhard Reiter
Message:

HTML API: Add matches_breadcrumbs() method for better querying.

Inside a next_tag() loop it can be challenging to use breadcrumbs because they are only exposed inside the call to next_tag() via the $query arg.

In this patch a new method, matches_breadcrumbs(), is exposed which allows for querying within the next_tag() loop for more complicated queries.

This method exposes a wildcard * operator to allow matching any HTML tag that the currently-matched tag is a child or descendant of.

Props dmsnell, westonruter, mukesh27.
Fixes #59400.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-processor.php

    r56565 r56702  
    358358     *     @type string|null $class_name   Tag must contain this whole class name to match.
    359359     *     @type string[]    $breadcrumbs  DOM sub-path at which element is found, e.g. `array( 'FIGURE', 'IMG' )`.
     360     *                                     May also contain the wildcard `*` which matches a single element, e.g. `array( 'SECTION', '*' )`.
    360361     * }
    361362     * @return bool Whether a tag was matched.
     
    407408        $match_offset = isset( $query['match_offset'] ) ? (int) $query['match_offset'] : 1;
    408409
    409         $crumb  = end( $breadcrumbs );
    410         $target = strtoupper( $crumb );
    411410        while ( $match_offset > 0 && $this->step() ) {
    412             if ( $target !== $this->get_tag() ) {
    413                 continue;
     411            if ( $this->matches_breadcrumbs( $breadcrumbs ) && 0 === --$match_offset ) {
     412                return true;
    414413            }
    415 
    416             // Look up the stack to see if the breadcrumbs match.
    417             foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
    418                 if ( strtoupper( $crumb ) !== $node->node_name ) {
    419                     break;
    420                 }
    421 
    422                 $crumb = prev( $breadcrumbs );
    423                 if ( false === $crumb && 0 === --$match_offset && ! $this->is_tag_closer() ) {
    424                     return true;
    425                 }
     414        }
     415
     416        return false;
     417    }
     418
     419    /**
     420     * Indicates if the currently-matched tag matches the given breadcrumbs.
     421     *
     422     * A "*" represents a single tag wildcard, where any tag matches, but not no tags.
     423     *
     424     * At some point this function _may_ support a `**` syntax for matching any number
     425     * of unspecified tags in the breadcrumb stack. This has been intentionally left
     426     * out, however, to keep this function simple and to avoid introducing backtracking,
     427     * which could open up surprising performance breakdowns.
     428     *
     429     * Example:
     430     *
     431     *     $processor = WP_HTML_Processor::createFragment( '<div><span><figure><img></figure></span></div>' );
     432     *     $processor->next_tag( 'img' );
     433     *     true  === $processor->matches_breadcrumbs( array( 'figure', 'img' ) );
     434     *     true  === $processor->matches_breadcrumbs( array( 'span', 'figure', 'img' ) );
     435     *     false === $processor->matches_breadcrumbs( array( 'span', 'img' ) );
     436     *     true  === $processor->matches_breadcrumbs( array( 'span', '*', 'img' ) );
     437     *
     438     * @since 6.4.0
     439     *
     440     * @param string[] $breadcrumbs DOM sub-path at which element is found, e.g. `array( 'FIGURE', 'IMG' )`.
     441     *                              May also contain the wildcard `*` which matches a single element, e.g. `array( 'SECTION', '*' )`.
     442     * @return bool Whether the currently-matched tag is found at the given nested structure.
     443     */
     444    public function matches_breadcrumbs( $breadcrumbs ) {
     445        if ( ! $this->get_tag() ) {
     446            return false;
     447        }
     448
     449        // Everything matches when there are zero constraints.
     450        if ( 0 === count( $breadcrumbs ) ) {
     451            return true;
     452        }
     453
     454        // Start at the last crumb.
     455        $crumb = end( $breadcrumbs );
     456
     457        if ( '*' !== $crumb && $this->get_tag() !== strtoupper( $crumb ) ) {
     458            return false;
     459        }
     460
     461        foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
     462            $crumb = strtoupper( current( $breadcrumbs ) );
     463
     464            if ( '*' !== $crumb && $node->node_name !== $crumb ) {
     465                return false;
    426466            }
    427467
    428             $crumb = end( $breadcrumbs );
     468            if ( false === prev( $breadcrumbs ) ) {
     469                return true;
     470            }
    429471        }
    430472
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php

    r56380 r56702  
    354354
    355355    /**
     356     * @ticket 59400
     357     *
     358     * @dataProvider data_html_with_breadcrumbs_of_various_specificity
     359     *
     360     * @param string   $html_with_target_node HTML with a node containing a "target" attribute.
     361     * @param string[] $breadcrumbs           Breadcrumbs to test at the target node.
     362     * @param bool     $should_match          Whether the target node should match the breadcrumbs.
     363     */
     364    public function test_reports_if_tag_matches_breadcrumbs_of_various_specificity( $html_with_target_node, $breadcrumbs, $should_match ) {
     365        $processor = WP_HTML_Processor::createFragment( $html_with_target_node );
     366        while ( $processor->next_tag() && null === $processor->get_attribute( 'target' ) ) {
     367            continue;
     368        }
     369
     370        $matches = $processor->matches_breadcrumbs( $breadcrumbs );
     371        $path    = implode( ', ', $breadcrumbs );
     372        if ( $should_match ) {
     373            $this->assertTrue( $matches, "HTML tag {$processor->get_tag()} should have matched breadcrumbs but didn't: {$path}." );
     374        } else {
     375            $this->assertFalse( $matches, "HTML tag {$processor->get_tag()} should not have matched breadcrumbs but did: {$path}." );
     376        }
     377    }
     378
     379    /**
     380     * Data provider.
     381     *
     382     * @return array[].
     383     */
     384    public function data_html_with_breadcrumbs_of_various_specificity() {
     385        return array(
     386            // Test with void elements.
     387            'Inner IMG'                      => array( '<div><span><figure><img target></figure></span></div>', array( 'span', 'figure', 'img' ), true ),
     388            'Inner IMG wildcard'             => array( '<div><span><figure><img target></figure></span></div>', array( 'span', '*', 'img' ), true ),
     389            'Inner IMG no wildcard'          => array( '<div><span><figure><img target></figure></span></div>', array( 'span', 'img' ), false ),
     390            'Full specification'             => array( '<div><span><figure><img target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'img' ), true ),
     391            'Invalid Full specification'     => array( '<div><span><figure><img target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'img' ), false ),
     392
     393            // Test also with non-void elements that open and close.
     394            'Inner P'                        => array( '<div><span><figure><p target></figure></span></div>', array( 'span', 'figure', 'p' ), true ),
     395            'Inner P wildcard'               => array( '<div><span><figure><p target></figure></span></div>', array( 'span', '*', 'p' ), true ),
     396            'Inner P no wildcard'            => array( '<div><span><figure><p target></figure></span></div>', array( 'span', 'p' ), false ),
     397            'Full specification (P)'         => array( '<div><span><figure><p target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), true ),
     398            'Invalid Full specification (P)' => array( '<div><span><figure><p target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'p' ), false ),
     399
     400            // Ensure that matches aren't on tag closers.
     401            'Inner P'                        => array( '<div><span><figure></p target></figure></span></div>', array( 'span', 'figure', 'p' ), false ),
     402            'Inner P wildcard'               => array( '<div><span><figure></p target></figure></span></div>', array( 'span', '*', 'p' ), false ),
     403            'Inner P no wildcard'            => array( '<div><span><figure></p target></figure></span></div>', array( 'span', 'p' ), false ),
     404            'Full specification (P)'         => array( '<div><span><figure></p target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), false ),
     405            'Invalid Full specification (P)' => array( '<div><span><figure></p target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'p' ), false ),
     406
     407            // Test wildcard behaviors.
     408            'Single wildcard element'        => array( '<figure><code><div><p><span><img target></span></p></div></code></figure>', array( '*' ), true ),
     409            'Child of wildcard element'      => array( '<figure><code><div><p><span><img target></span></p></div></code></figure>', array( 'SPAN', '*' ), true ),
     410        );
     411    }
     412
     413    /**
    356414     * Ensures that the ability to set attributes isn't broken by the HTML Processor.
    357415     *
Note: See TracChangeset for help on using the changeset viewer.