Make WordPress Core

Changeset 56565


Ignore:
Timestamp:
09/13/2023 01:00:59 PM (9 months ago)
Author:
Bernhard Reiter
Message:

HTML API: Update documentation and rename internal variable on HTML Processor

This patch updates documentation and an internal variable name within the
HTML Processor class so that they are more helpful and complete to a reader.

There should be no functional or visual changes in this patch.

Props dmsnell, mukesh27.
Fixes #59267.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-processor.php

    r56558 r56565  
    2828 *  - Inserting and removing nodes.
    2929 *  - Reading and changing inner content.
     30 *  - Navigating up or around HTML structure.
    3031 *
    3132 * ## Usage
     
    6768 * matches all IMG elements directly inside a P element. To ensure that no
    6869 * partial matches erroneously match it's possible to specify in a query
    69  * the full breadcrumb match.
     70 * the full breadcrumb match all the way down from the root HTML element.
    7071 *
    7172 * Example:
     
    7778 *     $html = '<figure><img><figcaption>A <em>lovely</em> day outside</figcaption></figure>';
    7879 *     //                                  ---- Matches here.
    79  *     $processor->next_tag( array( 'breadcrumbs' => array( 'FIGURE', 'IMG', 'FIGCAPTION', 'EM' ) ) );
     80 *     $processor->next_tag( array( 'breadcrumbs' => array( 'FIGURE', 'FIGCAPTION', 'EM' ) ) );
    8081 *
    8182 *     $html = '<div><img></div><img>';
     
    101102 *  - Links: A.
    102103 *  - The formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
    103  *  - Containers: DIV, FIGCAPTION, FIGURE.
     104 *  - Containers: DIV, FIGCAPTION, FIGURE, SPAN.
     105 *  - Form elements: BUTTON.
    104106 *  - Paragraph: P.
    105107 *  - Void elements: IMG.
     
    117119 *  - Fully-balanced and non-overlapping tags.
    118120 *  - HTML with unexpected tag closers.
     121 *  - Some unbalanced or overlapping tags.
    119122 *  - P tags after unclosed P tags.
     123 *  - BUTTON tags after unclosed BUTTON tags.
    120124 *  - A tags after unclosed A tags that don't involve any active formatting elements.
    121125 *
     
    127131class WP_HTML_Processor extends WP_HTML_Tag_Processor {
    128132    /**
    129      * HTML processing requires more bookmarks than basic tag processing.
     133     * The maximum number of bookmarks allowed to exist at any given time.
     134     *
     135     * HTML processing requires more bookmarks than basic tag processing,
     136     * so this class constant from the Tag Processor is overwritten.
    130137     *
    131138     * @since 6.4.0
     
    137144    /**
    138145     * Static query for instructing the Tag Processor to visit every token.
     146     *
     147     * @access private
    139148     *
    140149     * @since 6.4.0
     
    159168     * Used to create unique bookmark names.
    160169     *
    161      * @since 6.4.0
     170     * This class sets a bookmark for every tag in the HTML document that it encounters.
     171     * The bookmark name is auto-generated and increments, starting with `1`. These are
     172     * internal bookmarks and are automatically released when the referring WP_HTML_Token
     173     * goes out of scope and is garbage-collected.
     174     *
     175     * @since 6.4.0
     176     *
     177     * @see WP_HTML_Processor::$release_internal_bookmark_on_destruct
    162178     *
    163179     * @var int
     
    187203     * @var closure
    188204     */
    189     private $release_internal_bookmark = null;
     205    private $release_internal_bookmark_on_destruct = null;
    190206
    191207    /*
     
    259275     * Constructor.
    260276     *
     277     * Do not use this method. Use the static creator methods instead.
     278     *
    261279     * @access private
    262280     *
    263281     * @since 6.4.0
     282     *
     283     * @see WP_HTML_Processor::createFragment()
    264284     *
    265285     * @param string      $html                                  HTML to process.
     
    288308         * exposing it to any public API.
    289309         */
    290         $this->release_internal_bookmark = function ( $name ) {
     310        $this->release_internal_bookmark_on_destruct = function ( $name ) {
    291311            parent::release_bookmark( $name );
    292312        };
     
    299319     * return `false` in all those cases. To determine why something
    300320     * failed it's possible to request the last error. This can be
    301      * helpful to know if it's possible to fix something or to give up.
     321     * helpful to know to distinguish whether a given tag couldn't
     322     * be found or if content in the document caused the processor
     323     * to give up and abort processing.
    302324     *
    303325     * Example
    304326     *
    305      *     $p = WP_HTML_Processor::createFragment( '<template><strong><button><em><p><em>' );
    306      *     false === $p->next_tag();
    307      *     WP_HTML_Processor::ERROR_UNSUPPORTED === $p->get_last_error();
     327     *     $processor = WP_HTML_Processor::createFragment( '<template><strong><button><em><p><em>' );
     328     *     false === $processor->next_tag();
     329     *     WP_HTML_Processor::ERROR_UNSUPPORTED === $processor->get_last_error();
    308330     *
    309331     * @since 6.4.0
     
    430452
    431453        if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
     454            /*
     455             * Void elements still hop onto the stack of open elements even though
     456             * there's no corresponding closing tag. This is important for managing
     457             * stack-based operations such as "navigate to parent node" or checking
     458             * on an element's breadcrumbs.
     459             *
     460             * When moving on to the next node, therefore, if the bottom-most element
     461             * on the stack is a void element, it must be closed.
     462             *
     463             * @TODO: Once self-closing foreign elements and BGSOUND are supported,
     464             *        they must also be implicitly closed here too. BGSOUND is
     465             *        special since it's only self-closing if the self-closing flag
     466             *        is provided in the opening tag, otherwise it expects a tag closer.
     467             */
    432468            $top_node = $this->state->stack_of_open_elements->current_node();
    433469            if ( $top_node && self::is_void( $top_node->node_name ) ) {
     
    447483            $this->get_tag(),
    448484            $this->is_tag_closer(),
    449             $this->release_internal_bookmark
     485            $this->release_internal_bookmark_on_destruct
    450486        );
    451487
     
    457493                default:
    458494                    $this->last_error = self::ERROR_UNSUPPORTED;
    459                     throw new WP_HTML_Unsupported_Exception( 'Cannot parse outside of the IN BODY insertion mode.' );
     495                    throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    460496            }
    461497        } catch ( WP_HTML_Unsupported_Exception $e ) {
     
    471507     * Computes the HTML breadcrumbs for the currently-matched node, if matched.
    472508     *
    473      * Breadcrumbs start at the outer-most parent and descend toward the matched element.
     509     * Breadcrumbs start at the outermost parent and descend toward the matched element.
     510     * They always include the entire path from the root HTML node to the matched element.
     511     *
     512     * @TODO: It could be more efficient to expose a generator-based version of this function
     513     *        to avoid creating the array copy on tag iteration. If this is done, it would likely
     514     *        be more useful to walk up the stack when yielding instead of starting at the top.
    474515     *
    475516     * Example
    476517     *
    477      *     $p = WP_HTML_Processor::createFragment( '<p><strong><em><img></em></strong></p>' );
    478      *     $p->next_tag( 'IMG' );
    479      *     $p->get_breadcrumbs() === array( 'HTML', 'BODY', 'P', 'STRONG', 'EM', 'IMG' );
    480      *
    481      * @since 6.4.0
    482      *
    483      * @return string[]|null Array of tag-names representing path to matched node, if matched, otherwise null.
     518     *     $processor = WP_HTML_Processor::createFragment( '<p><strong><em><img></em></strong></p>' );
     519     *     $processor->next_tag( 'IMG' );
     520     *     $processor->get_breadcrumbs() === array( 'HTML', 'BODY', 'P', 'STRONG', 'EM', 'IMG' );
     521     *
     522     * @since 6.4.0
     523     *
     524     * @return string[]|null Array of tag names representing path to matched node, if matched, otherwise NULL.
    484525     */
    485526    public function get_breadcrumbs() {
     
    500541     *
    501542     * This internal function performs the 'in body' insertion mode
    502      * logic for the generalized `self::step()` function.
    503      *
    504      * @access private
     543     * logic for the generalized WP_HTML_Processor::step() function.
    505544     *
    506545     * @since 6.4.0
     
    509548     *
    510549     * @see https://html.spec.whatwg.org/#parsing-main-inbody
    511      * @see self::step
     550     * @see WP_HTML_Processor::step
    512551     *
    513552     * @return bool Whether an element was found.
     
    699738
    700739    /**
    701      * Creates a new bookmark for the currently-matched tag and returns generated name.
     740     * Creates a new bookmark for the currently-matched tag and returns the generated name.
    702741     *
    703742     * @since 6.4.0
     
    727766     * Returns the uppercase name of the matched tag.
    728767     *
     768     * The semantic rules for HTML specify that certain tags be reprocessed
     769     * with a different tag name. Because of this, the tag name presented
     770     * by the HTML Processor may differ from the one reported by the HTML
     771     * Tag Processor, which doesn't apply these semantic rules.
     772     *
    729773     * Example:
    730774     *
    731      *     $p = new WP_HTML_Tag_Processor( '<div class="test">Test</div>' );
    732      *     $p->next_tag() === true;
    733      *     $p->get_tag() === 'DIV';
    734      *
    735      *     $p->next_tag() === false;
    736      *     $p->get_tag() === null;
     775     *     $processor = new WP_HTML_Tag_Processor( '<div class="test">Test</div>' );
     776     *     $processor->next_tag() === true;
     777     *     $processor->get_tag() === 'DIV';
     778     *
     779     *     $processor->next_tag() === false;
     780     *     $processor->get_tag() === null;
    737781     *
    738782     * @since 6.4.0
     
    776820
    777821    /**
    778      * Moves the internal cursor in the Tag Processor to a given bookmark's location.
     822     * Moves the internal cursor in the HTML Processor to a given bookmark's location.
    779823     *
    780824     * In order to prevent accidental infinite loops, there's a
    781825     * maximum limit on the number of times seek() can be called.
     826     *
     827     * @throws Exception When unable to allocate a bookmark for the next token in the input HTML document.
    782828     *
    783829     * @since 6.4.0
     
    13491395     * This unlock code is used to ensure that anyone calling the constructor is
    13501396     * doing so with a full understanding that it's intended to be a private API.
     1397     *
     1398     * @access private
    13511399     */
    13521400    const CONSTRUCTOR_UNLOCK_CODE = 'Use WP_HTML_Processor::createFragment instead of calling the class constructor directly.';
Note: See TracChangeset for help on using the changeset viewer.