Make WordPress Core

Changeset 58191


Ignore:
Timestamp:
05/23/2024 11:35:52 PM (7 weeks ago)
Author:
dmsnell
Message:

HTML API: Add method to report depth of currently-matched node.

The HTML Processor maintains a stack of open elements, where every element,
every #text node, every HTML comment, and other node is pushed and popped while
traversing the document. The "depth" of each of these nodes represents how deep
that stack is where the node appears. Unfortunately this information isn't
exposed to calling code, which has led different projects to attempt to
calculate this value externally. This isn't always trivial, but the HTML
Processor could make it so by exposing the internal knowledge in a new method.

In this patch the get_current_depth() method returns just that. Since the
processor always exists within a context, the depth includes nesting from the
always-present html element and also the body, since currently the HTML
Processor only supports parsing in the IN BODY context.

This means that the depth reported for the DIV in <div> is 3, not 1, because
its breadcrumbs path is HTML > BODY > DIV.

Developed in https://github.com/WordPress/wordpress-develop/pull/6589
Discussed in https://core.trac.wordpress.org/ticket/61255

Fixes #61255.
Props dmsnell, jonsurrell.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-processor.php

    r58190 r58191  
    622622
    623623        return $breadcrumbs;
     624    }
     625
     626    /**
     627     * Returns the nesting depth of the current location in the document.
     628     *
     629     * Example:
     630     *
     631     *     $processor = WP_HTML_Processor::create_fragment( '<div><p></p></div>' );
     632     *     // The processor starts in the BODY context, meaning it has depth from the start: HTML > BODY.
     633     *     2 === $processor->get_current_depth();
     634     *
     635     *     // Opening the DIV element increases the depth.
     636     *     $processor->next_token();
     637     *     3 === $processor->get_current_depth();
     638     *
     639     *     // Opening the P element increases the depth.
     640     *     $processor->next_token();
     641     *     4 === $processor->get_current_depth();
     642     *
     643     *     // The P element is closed during `next_token()` so the depth is decreased to reflect that.
     644     *     $processor->next_token();
     645     *     3 === $processor->get_current_depth();
     646     *
     647     * @since 6.6.0
     648     *
     649     * @return int Nesting-depth of current location in the document.
     650     */
     651    public function get_current_depth() {
     652        return $this->state->stack_of_open_elements->count();
    624653    }
    625654
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessor.php

    r58048 r58191  
    335335        );
    336336    }
     337
     338    /**
     339     * Ensures that the HTML Processor properly reports the depth of a given element.
     340     *
     341     * @ticket 61255
     342     *
     343     * @dataProvider data_html_with_target_element_and_depth_in_body
     344     *
     345     * @param string $html_with_target_element HTML containing element with `target` class.
     346     * @param int    $depth_at_element         Depth into document at target node.
     347     */
     348    public function test_reports_proper_element_depth_in_body( $html_with_target_element, $depth_at_element ) {
     349        $processor = WP_HTML_Processor::create_fragment( $html_with_target_element );
     350
     351        $this->assertTrue(
     352            $processor->next_tag( array( 'class_name' => 'target' ) ),
     353            'Failed to find target element: check test data provider.'
     354        );
     355
     356        $this->assertSame(
     357            $depth_at_element,
     358            $processor->get_current_depth(),
     359            'HTML Processor reported the wrong depth at the matched element.'
     360        );
     361    }
     362
     363    /**
     364     * Data provider.
     365     *
     366     * @return array[].
     367     */
     368    public static function data_html_with_target_element_and_depth_in_body() {
     369        return array(
     370            'Single element'                    => array( '<div class="target">', 3 ),
     371            'Basic layout and formatting stack' => array( '<div><span><p><b><em class="target">', 7 ),
     372            'Adjacent elements'                 => array( '<div><span></span><span class="target"></div>', 4 ),
     373        );
     374    }
     375
     376    /**
     377     * Ensures that the HTML Processor properly reports the depth of a given non-element.
     378     *
     379     * @ticket 61255
     380     *
     381     * @dataProvider data_html_with_target_element_and_depth_of_next_node_in_body
     382     *
     383     * @param string $html_with_target_element HTML containing element with `target` class.
     384     * @param int    $depth_after_element      Depth into document immediately after target node.
     385     */
     386    public function test_reports_proper_non_element_depth_in_body( $html_with_target_element, $depth_after_element ) {
     387        $processor = WP_HTML_Processor::create_fragment( $html_with_target_element );
     388
     389        $this->assertTrue(
     390            $processor->next_tag( array( 'class_name' => 'target' ) ),
     391            'Failed to find target element: check test data provider.'
     392        );
     393
     394        $this->assertTrue(
     395            $processor->next_token(),
     396            'Failed to find next node after target element: check tests data provider.'
     397        );
     398
     399        $this->assertSame(
     400            $depth_after_element,
     401            $processor->get_current_depth(),
     402            'HTML Processor reported the wrong depth after the matched element.'
     403        );
     404    }
     405
     406    /**
     407     * Data provider.
     408     *
     409     * @return array[].
     410     */
     411    public static function data_html_with_target_element_and_depth_of_next_node_in_body() {
     412        return array(
     413            'Element then text'                 => array( '<div class="target">One Deeper', 4 ),
     414            'Basic layout and formatting stack' => array( '<div><span><p><b><em class="target">Formatted', 8 ),
     415            'Basic layout with text'            => array( '<div>a<span>b<p>c<b>e<em class="target">e', 8 ),
     416            'Adjacent elements'                 => array( '<div><span></span><span class="target">Here</div>', 5 ),
     417            'Adjacent text'                     => array( '<p>Before<img class="target">After</p>', 4 ),
     418            'HTML comment'                      => array( '<img class="target"><!-- this is inside the BODY -->', 3 ),
     419            'HTML comment in DIV'               => array( '<div class="target"><!-- this is inside the BODY -->', 4 ),
     420            'Funky comment'                     => array( '<div><p>What <br class="target"><//wp:post-author></p></div>', 5 ),
     421        );
     422    }
    337423}
Note: See TracChangeset for help on using the changeset viewer.