Make WordPress Core


Ignore:
Timestamp:
01/08/2024 02:03:40 PM (5 months ago)
Author:
Bernhard Reiter
Message:

HTML API: Add explicit handling or failure for all tags.

The HTML API HTML processor does not yet support all tags. Many tags (e.g. list elements) have some complicated rules in the "in body" insertion mode.

Implementing these special rules is blocking the implementation for a catch-all rule for "any other tag" because we need to prevent special rules from being handled by the catch-all.

Any other start tag
Reconstruct the active formatting elements, if any.

Insert an HTML element for the token.


This change ensures the HTML Processor fails when handling special tags. This is the same as existing behavior, but will allow us to implement the catch-all "any other tag" handling without unintentionally handling special elements.

Additionally, we add tests that assert the special elements are unhandled. As these tags are implemented, this should help to ensure they're removed from the unsupported tag list.

Props jonsurrell, dmsnell.
Fixes #60092.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessor.php

    r57186 r57248  
    6262
    6363    /**
    64      * Ensures that if the HTML Processor encounters inputs that it can't properly handle,
    65      * that it stops processing the rest of the document. This prevents data corruption.
    66      *
    67      * @ticket 59167
    68      *
    69      * @covers WP_HTML_Processor::next_tag
    70      */
    71     public function test_stops_processing_after_unsupported_elements() {
    72         $p = WP_HTML_Processor::create_fragment( '<p><x-not-supported></p><p></p>' );
    73         $p->next_tag( 'P' );
    74         $this->assertFalse( $p->next_tag(), 'Stepped into a tag after encountering X-NOT-SUPPORTED element when it should have aborted.' );
    75         $this->assertNull( $p->get_tag(), "Should have aborted processing, but still reported tag {$p->get_tag()} after properly failing to step into tag." );
    76         $this->assertFalse( $p->next_tag( 'P' ), 'Stepped into normal P element after X-NOT-SUPPORTED element when it should have aborted.' );
    77     }
    78 
    79     /**
    8064     * Ensures that the HTML Processor maintains its internal state through seek calls.
    8165     *
     
    148132        $this->assertFalse( $p->next_tag( 'EM' ), 'Should have aborted before finding second EM as it required reconstructing the first EM.' );
    149133    }
     134
     135    /**
     136     * Ensures that special handling of unsupported tags is cleaned up
     137     * as handling is implemented. Otherwise there's risk of leaving special
     138     * handling (that is never reached) when tag handling is implemented.
     139     *
     140     * @ticket 60092
     141     *
     142     * @dataProvider data_unsupported_special_in_body_tags
     143     *
     144     * @covers WP_HTML_Processor::step_in_body
     145     *
     146     * @param string $tag_name Name of the tag to test.
     147     */
     148    public function test_step_in_body_fails_on_unsupported_tags( $tag_name ) {
     149        $fragment = WP_HTML_Processor::create_fragment( '<' . $tag_name . '></' . $tag_name . '>' );
     150        $this->assertFalse( $fragment->next_tag(), 'Should fail to find tag: ' . $tag_name . '.' );
     151        $this->assertEquals( $fragment->get_last_error(), WP_HTML_Processor::ERROR_UNSUPPORTED, 'Should have unsupported last error.' );
     152    }
     153
     154    /**
     155     * Data provider.
     156     *
     157     * @return array[]
     158     */
     159    public function data_unsupported_special_in_body_tags() {
     160        return array(
     161            'APPLET'    => array( 'APPLET' ),
     162            'AREA'      => array( 'AREA' ),
     163            'BASE'      => array( 'BASE' ),
     164            'BASEFONT'  => array( 'BASEFONT' ),
     165            'BGSOUND'   => array( 'BGSOUND' ),
     166            'BODY'      => array( 'BODY' ),
     167            'BR'        => array( 'BR' ),
     168            'CAPTION'   => array( 'CAPTION' ),
     169            'COL'       => array( 'COL' ),
     170            'COLGROUP'  => array( 'COLGROUP' ),
     171            'DD'        => array( 'DD' ),
     172            'DT'        => array( 'DT' ),
     173            'EMBED'     => array( 'EMBED' ),
     174            'FORM'      => array( 'FORM' ),
     175            'FRAME'     => array( 'FRAME' ),
     176            'FRAMESET'  => array( 'FRAMESET' ),
     177            'HEAD'      => array( 'HEAD' ),
     178            'HR'        => array( 'HR' ),
     179            'HTML'      => array( 'HTML' ),
     180            'IFRAME'    => array( 'IFRAME' ),
     181            'INPUT'     => array( 'INPUT' ),
     182            'KEYGEN'    => array( 'KEYGEN' ),
     183            'LI'        => array( 'LI' ),
     184            'LINK'      => array( 'LINK' ),
     185            'LISTING'   => array( 'LISTING' ),
     186            'MARQUEE'   => array( 'MARQUEE' ),
     187            'MATH'      => array( 'MATH' ),
     188            'META'      => array( 'META' ),
     189            'NOBR'      => array( 'NOBR' ),
     190            'NOEMBED'   => array( 'NOEMBED' ),
     191            'NOFRAMES'  => array( 'NOFRAMES' ),
     192            'NOSCRIPT'  => array( 'NOSCRIPT' ),
     193            'OBJECT'    => array( 'OBJECT' ),
     194            'OL'        => array( 'OL' ),
     195            'OPTGROUP'  => array( 'OPTGROUP' ),
     196            'OPTION'    => array( 'OPTION' ),
     197            'PARAM'     => array( 'PARAM' ),
     198            'PLAINTEXT' => array( 'PLAINTEXT' ),
     199            'PRE'       => array( 'PRE' ),
     200            'RB'        => array( 'RB' ),
     201            'RP'        => array( 'RP' ),
     202            'RT'        => array( 'RT' ),
     203            'RTC'       => array( 'RTC' ),
     204            'SARCASM'   => array( 'SARCASM' ),
     205            'SCRIPT'    => array( 'SCRIPT' ),
     206            'SELECT'    => array( 'SELECT' ),
     207            'SOURCE'    => array( 'SOURCE' ),
     208            'STYLE'     => array( 'STYLE' ),
     209            'SVG'       => array( 'SVG' ),
     210            'TABLE'     => array( 'TABLE' ),
     211            'TBODY'     => array( 'TBODY' ),
     212            'TD'        => array( 'TD' ),
     213            'TEMPLATE'  => array( 'TEMPLATE' ),
     214            'TEXTAREA'  => array( 'TEXTAREA' ),
     215            'TFOOT'     => array( 'TFOOT' ),
     216            'TH'        => array( 'TH' ),
     217            'THEAD'     => array( 'THEAD' ),
     218            'TITLE'     => array( 'TITLE' ),
     219            'TR'        => array( 'TR' ),
     220            'TRACK'     => array( 'TRACK' ),
     221            'UL'        => array( 'UL' ),
     222            'WBR'       => array( 'WBR' ),
     223            'XMP'       => array( 'XMP' ),
     224        );
     225    }
    150226}
Note: See TracChangeset for help on using the changeset viewer.