Make WordPress Core

Changeset 57316


Ignore:
Timestamp:
01/19/2024 09:40:01 PM (10 months ago)
Author:
dmsnell
Message:

HTML API: Add support for BR, EMBED, & other tags.

Adds support for the following HTML elements to the HTML Processor:

  • AREA, BR, EMBED, KEYGEN, WBR
  • Only the opening BR tag is supported, as the invalid closer </br> involves more complicated rules, to be implemented later.

Previously, these elements were not supported and the HTML Processor
would bail when encountering them. With this patch it will proceed to
parse an HTML document when encountering those tags as long as other
normal conditions don't cause it to bail (such as complicated format
reconstruction rules).

Props jonsurrell, dmsnell
Fixes #60283

Location:
trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-processor.php

    r57314 r57316  
    103103 *  - Custom elements: All custom elements are supported. :)
    104104 *  - Form elements: BUTTON, DATALIST, FIELDSET, LABEL, LEGEND, METER, PROGRESS, SEARCH.
    105  *  - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
     105 *  - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U, WBR.
    106106 *  - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP.
    107107 *  - Links: A.
    108108 *  - Lists: DD, DL, DT, LI, OL, LI.
    109  *  - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO.
    110  *  - Paragraph: P.
    111  *  - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
     109 *  - Media elements: AUDIO, CANVAS, EMBED, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO.
     110 *  - Paragraph: BR, P.
     111 *  - Phrasing elements: AREA, ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
    112112 *  - Sectioning elements: ARTICLE, ASIDE, HR, NAV, SECTION.
    113113 *  - Templating elements: SLOT.
    114114 *  - Text decoration: RUBY.
    115  *  - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, MULTICOL, NEXTID, SPACER.
     115 *  - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, KEYGEN, MULTICOL, NEXTID, SPACER.
    116116 *
    117117 * ### Supported markup
     
    936936
    937937            /*
     938             * > An end tag whose tag name is "br"
     939             * >   Parse error. Drop the attributes from the token, and act as described in the next
     940             * >   entry; i.e. act as if this was a "br" start tag token with no attributes, rather
     941             * >   than the end tag token that it actually is.
     942             */
     943            case '-BR':
     944                $this->last_error = self::ERROR_UNSUPPORTED;
     945                throw new WP_HTML_Unsupported_Exception( 'Closing BR tags require unimplemented special handling.' );
     946
     947            /*
    938948             * > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr"
    939949             */
     950            case '+AREA':
     951            case '+BR':
     952            case '+EMBED':
    940953            case '+IMG':
     954            case '+KEYGEN':
     955            case '+WBR':
    941956                $this->reconstruct_active_formatting_elements();
    942957                $this->insert_html_element( $this->state->current_token );
     958                $this->state->frameset_ok = false;
    943959                return true;
    944960
     
    978994            case 'BGSOUND':
    979995            case 'BODY':
    980             case 'BR':
    981996            case 'CAPTION':
    982997            case 'COL':
     
    984999            case 'DD':
    9851000            case 'DT':
    986             case 'EMBED':
    9871001            case 'FORM':
    9881002            case 'FRAME':
     
    9921006            case 'IFRAME':
    9931007            case 'INPUT':
    994             case 'KEYGEN':
    9951008            case 'LI':
    9961009            case 'LINK':
     
    10321045            case 'TRACK':
    10331046            case 'UL':
    1034             case 'WBR':
    10351047            case 'XMP':
    10361048                $this->last_error = self::ERROR_UNSUPPORTED;
     
    16931705            'INPUT' === $tag_name ||
    16941706            'LINK' === $tag_name ||
     1707            'KEYGEN' === $tag_name || // Obsolete but still treated as void.
    16951708            'META' === $tag_name ||
    16961709            'SOURCE' === $tag_name ||
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessor.php

    r57314 r57316  
    134134
    135135    /**
     136     * Ensure non-nesting tags do not nest.
     137     *
     138     * @ticket 60283
     139     *
     140     * @covers WP_HTML_Processor::step_in_body
     141     * @covers WP_HTML_Processor::is_void
     142     *
     143     * @dataProvider data_void_tags
     144     *
     145     * @param string $tag_name Name of void tag under test.
     146     */
     147    public function test_cannot_nest_void_tags( $tag_name ) {
     148        $processor = WP_HTML_Processor::create_fragment( "<{$tag_name}><div>" );
     149
     150        /*
     151         * This HTML represents the same as the following HTML,
     152         * assuming that it were provided `<img>` as the tag:
     153         *
     154         *     <html>
     155         *         <body>
     156         *             <img>
     157         *             <div></div>
     158         *         </body>
     159         *     </html>
     160         */
     161
     162        $found_tag = $processor->next_tag();
     163
     164        if ( WP_HTML_Processor::ERROR_UNSUPPORTED === $processor->get_last_error() ) {
     165            $this->markTestSkipped( "Tag {$tag_name} is not supported." );
     166        }
     167
     168        $this->assertTrue(
     169            $found_tag,
     170            "Could not find first {$tag_name}."
     171        );
     172
     173        $this->assertSame(
     174            array( 'HTML', 'BODY', $tag_name ),
     175            $processor->get_breadcrumbs(),
     176            'Found incorrect nesting of first element.'
     177        );
     178
     179        $this->assertTrue(
     180            $processor->next_tag(),
     181            'Should have found the DIV as the second tag.'
     182        );
     183
     184        $this->assertSame(
     185            array( 'HTML', 'BODY', 'DIV' ),
     186            $processor->get_breadcrumbs(),
     187            "DIV should have been a sibling of the {$tag_name}."
     188        );
     189    }
     190
     191    /**
     192     * Data provider.
     193     *
     194     * @return array[]
     195     */
     196    public function data_void_tags() {
     197        return array(
     198            'AREA'   => array( 'AREA' ),
     199            'BASE'   => array( 'BASE' ),
     200            'BR'     => array( 'BR' ),
     201            'COL'    => array( 'COL' ),
     202            'EMBED'  => array( 'EMBED' ),
     203            'HR'     => array( 'HR' ),
     204            'IMG'    => array( 'IMG' ),
     205            'INPUT'  => array( 'INPUT' ),
     206            'KEYGEN' => array( 'KEYGEN' ),
     207            'LINK'   => array( 'LINK' ),
     208            'META'   => array( 'META' ),
     209            'SOURCE' => array( 'SOURCE' ),
     210            'TRACK'  => array( 'TRACK' ),
     211            'WBR'    => array( 'WBR' ),
     212        );
     213    }
     214
     215    /**
    136216     * Ensures that special handling of unsupported tags is cleaned up
    137217     * as handling is implemented. Otherwise there's risk of leaving special
     
    160240        return array(
    161241            'APPLET'    => array( 'APPLET' ),
    162             'AREA'      => array( 'AREA' ),
    163242            'BASE'      => array( 'BASE' ),
    164243            'BASEFONT'  => array( 'BASEFONT' ),
    165244            'BGSOUND'   => array( 'BGSOUND' ),
    166245            'BODY'      => array( 'BODY' ),
    167             'BR'        => array( 'BR' ),
    168246            'CAPTION'   => array( 'CAPTION' ),
    169247            'COL'       => array( 'COL' ),
    170248            'COLGROUP'  => array( 'COLGROUP' ),
    171             'EMBED'     => array( 'EMBED' ),
    172249            'FORM'      => array( 'FORM' ),
    173250            'FRAME'     => array( 'FRAME' ),
     
    177254            'IFRAME'    => array( 'IFRAME' ),
    178255            'INPUT'     => array( 'INPUT' ),
    179             'KEYGEN'    => array( 'KEYGEN' ),
    180256            'LINK'      => array( 'LINK' ),
    181257            'LISTING'   => array( 'LISTING' ),
     
    214290            'TR'        => array( 'TR' ),
    215291            'TRACK'     => array( 'TRACK' ),
    216             'WBR'       => array( 'WBR' ),
    217292            'XMP'       => array( 'XMP' ),
    218293        );
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php

    r57314 r57316  
    163163        $unsupported_elements = array(
    164164            'APPLET', // Deprecated.
    165             'AREA',
    166165            'BASE',
    167166            'BGSOUND', // Deprecated; self-closing if self-closing flag provided, otherwise normal.
    168167            'BODY',
    169             'BR',
    170168            'CAPTION',
    171169            'COL',
    172170            'COLGROUP',
    173             'EMBED',
    174171            'FORM',
    175172            'FRAME',
     
    179176            'IFRAME',
    180177            'INPUT',
    181             'KEYGEN', // Deprecated; void.
    182178            'LINK',
    183179            'LISTING', // Deprecated, use PRE instead.
     
    214210            'TR',
    215211            'TRACK',
    216             'WBR',
    217212            'XMP', // Deprecated, use PRE instead.
    218213        );
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php

    r57314 r57316  
    393393        $this->assertSame( array( 'HTML', 'BODY', 'DIV', 'DIV' ), $p->get_breadcrumbs(), 'Failed to produce expected DOM nesting: SPAN should be closed and DIV should be its sibling.' );
    394394    }
     395
     396    /**
     397     * Ensures that support isn't accidentally partially added for the closing BR tag `</br>`.
     398     *
     399     * This tag closer has special rules and support shouldn't be added without implementing full support.
     400     *
     401     * > An end tag whose tag name is "br"
     402     * >   Parse error. Drop the attributes from the token, and act as described in the next entry;
     403     * >   i.e. act as if this was a "br" start tag token with no attributes, rather than the end
     404     * >   tag token that it actually is.
     405     *
     406     * When this handling is implemented, this test should be removed. It's not incorporated
     407     * into the existing unsupported tag behavior test because the opening tag is supported;
     408     * only the closing tag isn't.
     409     *
     410     * @covers WP_HTML_Processor::step_in_body
     411     *
     412     * @ticket 60283
     413     */
     414    public function test_br_end_tag_unsupported() {
     415        $p = WP_HTML_Processor::create_fragment( '</br>' );
     416
     417        $this->assertFalse( $p->next_tag(), 'Found a BR tag that should not be handled.' );
     418        $this->assertSame( WP_HTML_Processor::ERROR_UNSUPPORTED, $p->get_last_error() );
     419    }
    395420}
Note: See TracChangeset for help on using the changeset viewer.