Make WordPress Core

Changeset 55668


Ignore:
Timestamp:
04/20/2023 05:15:40 PM (10 months ago)
Author:
Bernhard Reiter
Message:

HTML API: Add support for a few invalid HTML comment forms.

  • Comments created by means of a tag closer with an invalid tag name, e.g. </3>.
  • Comments closed with the invalid --!> closer. (Comments should be closed by --> but if the ! appears it will also close it, in error.)
  • Empty tag name elements, which are technically skipped over and aren't comments, e.g. </>.

Props dmsnell, costdev.
Merges [55667] to the 6.2 branch.
Fixes #58007.

Location:
branches/6.2
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/6.2/src/wp-includes/html-api/class-wp-html-tag-processor.php

    r55662 r55668  
    972972     *
    973973     * @since 6.2.0
     974     * @since 6.2.1 Support abruptly-closed comments, invalid-tag-closer-comments, and empty elements.
    974975     *
    975976     * @return bool Whether a tag was found before the end of the document.
     
    10401041                    '-' === $html[ $at + 3 ]
    10411042                ) {
    1042                     $closer_at = strpos( $html, '-->', $at + 4 );
    1043                     if ( false === $closer_at ) {
     1043                    $closer_at = $at + 4;
     1044                    // If it's not possible to close the comment then there is nothing more to scan.
     1045                    if ( strlen( $html ) <= $closer_at ) {
    10441046                        return false;
    10451047                    }
    10461048
    1047                     $at = $closer_at + 3;
    1048                     continue;
     1049                    // Abruptly-closed empty comments are a sequence of dashes followed by `>`.
     1050                    $span_of_dashes = strspn( $html, '-', $closer_at );
     1051                    if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
     1052                        $at = $closer_at + $span_of_dashes + 1;
     1053                        continue;
     1054                    }
     1055
     1056                    /*
     1057                     * Comments may be closed by either a --> or an invalid --!>.
     1058                     * The first occurrence closes the comment.
     1059                     *
     1060                     * See https://html.spec.whatwg.org/#parse-error-incorrectly-closed-comment
     1061                     */
     1062                    $closer_at--; // Pre-increment inside condition below reduces risk of accidental infinite looping.
     1063                    while ( ++$closer_at < strlen( $html ) ) {
     1064                        $closer_at = strpos( $html, '--', $closer_at );
     1065                        if ( false === $closer_at ) {
     1066                            return false;
     1067                        }
     1068
     1069                        if ( $closer_at + 2 < strlen( $html ) && '>' === $html[ $closer_at + 2 ] ) {
     1070                            $at = $closer_at + 3;
     1071                            continue 2;
     1072                        }
     1073
     1074                        if ( $closer_at + 3 < strlen( $html ) && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) {
     1075                            $at = $closer_at + 4;
     1076                            continue 2;
     1077                        }
     1078                    }
    10491079                }
    10501080
     
    11061136
    11071137            /*
     1138             * </> is a missing end tag name, which is ignored.
     1139             *
     1140             * See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name
     1141             */
     1142            if ( '>' === $html[ $at + 1 ] ) {
     1143                $at++;
     1144                continue;
     1145            }
     1146
     1147            /*
    11081148             * <? transitions to a bogus comment state – skip to the nearest >
    1109              * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
     1149             * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
    11101150             */
    11111151            if ( '?' === $html[ $at + 1 ] ) {
    11121152                $closer_at = strpos( $html, '>', $at + 2 );
     1153                if ( false === $closer_at ) {
     1154                    return false;
     1155                }
     1156
     1157                $at = $closer_at + 1;
     1158                continue;
     1159            }
     1160
     1161            /*
     1162             * If a non-alpha starts the tag name in a tag closer it's a comment.
     1163             * Find the first `>`, which closes the comment.
     1164             *
     1165             * See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
     1166             */
     1167            if ( $this->is_closing_tag ) {
     1168                $closer_at = strpos( $html, '>', $at + 3 );
    11131169                if ( false === $closer_at ) {
    11141170                    return false;
  • branches/6.2/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php

    r55662 r55668  
    16841684
    16851685    /**
     1686     * Invalid tag names are comments on tag closers.
     1687     *
     1688     * @ticket 58007
     1689     *
     1690     * @link https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
     1691     *
     1692     * @dataProvider data_next_tag_ignores_invalid_first_character_of_tag_name_comments
     1693     *
     1694     * @param string $html_with_markers HTML containing an invalid tag closer whose element before and
     1695     *                                  element after contain the "start" and "end" CSS classes.
     1696     */
     1697    public function test_next_tag_ignores_invalid_first_character_of_tag_name_comments( $html_with_markers ) {
     1698        $p = new WP_HTML_Tag_Processor( $html_with_markers );
     1699        $p->next_tag( array( 'class_name' => 'start' ) );
     1700        $p->next_tag();
     1701
     1702        $this->assertSame( 'end', $p->get_attribute( 'class' ) );
     1703    }
     1704
     1705    /**
     1706     * Data provider.
     1707     *
     1708     * @return array[]
     1709     */
     1710    public function data_next_tag_ignores_invalid_first_character_of_tag_name_comments() {
     1711        return array(
     1712            'Invalid tag openers as normal text'           => array(
     1713                '<ul><li><div class=start>I <3 when outflow > inflow</div><img class=end></li></ul>',
     1714            ),
     1715
     1716            'Invalid tag closers as comments'              => array(
     1717                '<ul><li><div class=start>I </3 when <img> outflow <br class=end> inflow</div></li></ul>',
     1718            ),
     1719
     1720            'Unexpected question mark instead of tag name' => array(
     1721                '<div class=start><?xml-stylesheet type="text/css" href="style.css"?><hr class=end>',
     1722            ),
     1723        );
     1724    }
     1725
     1726    /**
    16861727     * @ticket 56299
    16871728     *
     
    17321773                'rcdata_tag'      => 'TEXTAREA',
    17331774            ),
     1775        );
     1776    }
     1777
     1778    /**
     1779     * Ensures that the invalid comment closing syntax "--!>" properly closes a comment.
     1780     *
     1781     * @ticket 58007
     1782     *
     1783     * @covers WP_HTML_Tag_Processor::next_tag
     1784     *
     1785     */
     1786    public function test_allows_incorrectly_closed_comments() {
     1787        $p = new WP_HTML_Tag_Processor( '<img id=before><!-- <img id=inside> --!><img id=after>--><img id=final>' );
     1788
     1789        $p->next_tag();
     1790        $this->assertSame( 'before', $p->get_attribute( 'id' ), 'Did not find starting tag.' );
     1791
     1792        $p->next_tag();
     1793        $this->assertSame( 'after', $p->get_attribute( 'id' ), 'Did not properly close improperly-closed comment.' );
     1794
     1795        $p->next_tag();
     1796        $this->assertSame( 'final', $p->get_attribute( 'id' ), 'Did not skip over unopened comment-closer.' );
     1797    }
     1798
     1799    /**
     1800     * Ensures that unclosed and invalid comments don't trigger warnings or errors.
     1801     *
     1802     * @ticket 58007
     1803     *
     1804     * @covers WP_HTML_Tag_Processor::next_tag
     1805     *
     1806     * @dataProvider data_html_with_unclosed_comments
     1807     *
     1808     * @param string $html_ending_before_comment_close HTML with opened comments that aren't closed
     1809     */
     1810    public function test_documents_may_end_with_unclosed_comment( $html_ending_before_comment_close ) {
     1811        $p = new WP_HTML_Tag_Processor( $html_ending_before_comment_close );
     1812
     1813        $this->assertFalse( $p->next_tag() );
     1814    }
     1815
     1816    /**
     1817     * Data provider.
     1818     *
     1819     * @return array[]
     1820     */
     1821    public function data_html_with_unclosed_comments() {
     1822        return array(
     1823            'Shortest open valid comment'      => array( '<!--' ),
     1824            'Basic truncated comment'          => array( '<!-- this ends --' ),
     1825            'Comment with closer look-alike'   => array( '<!-- this ends --x' ),
     1826            'Comment with closer look-alike 2' => array( '<!-- this ends --!x' ),
     1827            'Invalid tag-closer comment'       => array( '</(when will this madness end?)' ),
     1828            'Invalid tag-closer comment 2'     => array( '</(when will this madness end?)--' ),
     1829        );
     1830    }
     1831
     1832    /**
     1833     * Ensures that abruptly-closed empty comments are properly closed.
     1834     *
     1835     * @ticket 58007
     1836     *
     1837     * @covers WP_HTML_Tag_Processor::next_tag
     1838     *
     1839     * @dataProvider data_abruptly_closed_empty_comments
     1840     *
     1841     * @param string $html_with_after_marker HTML to test with "id=after" on element immediately following an abruptly closed comment.
     1842     */
     1843    public function test_closes_abrupt_closing_of_empty_comment( $html_with_after_marker ) {
     1844        $p = new WP_HTML_Tag_Processor( $html_with_after_marker );
     1845        $p->next_tag();
     1846        $p->next_tag();
     1847
     1848        $this->assertSame( 'after', $p->get_attribute( 'id' ), 'Did not find tag after closing abruptly-closed comment' );
     1849    }
     1850
     1851    /**
     1852     * Data provider.
     1853     *
     1854     * @return array[]
     1855     */
     1856    public function data_abruptly_closed_empty_comments() {
     1857        return array(
     1858            'Empty comment with two dashes only' => array( '<hr><!--><hr id=after>' ),
     1859            'Empty comment with two dashes only, improperly closed' => array( '<hr><!--!><hr id=inside>--><hr id=after>' ),
     1860            'Comment with two dashes only, improperly closed twice' => array( '<hr><!--!><hr id=inside>--!><hr id=after>' ),
     1861            'Empty comment with three dashes'    => array( '<hr><!---><hr id=after>' ),
     1862            'Empty comment with three dashes, improperly closed' => array( '<hr><!---!><hr id=inside>--><hr id=after>' ),
     1863            'Comment with three dashes, improperly closed twice' => array( '<hr><!---!><hr id=inside>--!><hr id=after>' ),
     1864            'Empty comment with four dashes'     => array( '<hr><!----><hr id=after>' ),
     1865            'Empty comment with four dashes, improperly closed' => array( '<hr><!----!><hr id=after>--><hr id=final>' ),
     1866            'Comment with four dashes, improperly closed twice' => array( '<hr><!----!><hr id=after>--!><hr id=final>' ),
     1867            'Comment with almost-closer inside'  => array( '<hr><!-- ---!><hr id=after>--!><hr id=final>' ),
    17341868        );
    17351869    }
Note: See TracChangeset for help on using the changeset viewer.