Changeset 55668
- Timestamp:
- 04/20/2023 05:15:40 PM (2 years ago)
- Location:
- branches/6.2
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/6.2/src/wp-includes/html-api/class-wp-html-tag-processor.php
r55662 r55668 972 972 * 973 973 * @since 6.2.0 974 * @since 6.2.1 Support abruptly-closed comments, invalid-tag-closer-comments, and empty elements. 974 975 * 975 976 * @return bool Whether a tag was found before the end of the document. … … 1040 1041 '-' === $html[ $at + 3 ] 1041 1042 ) { 1042 $closer_at = strpos( $html, '-->', $at + 4 ); 1043 if ( false === $closer_at ) { 1043 $closer_at = $at + 4; 1044 // If it's not possible to close the comment then there is nothing more to scan. 1045 if ( strlen( $html ) <= $closer_at ) { 1044 1046 return false; 1045 1047 } 1046 1048 1047 $at = $closer_at + 3; 1048 continue; 1049 // Abruptly-closed empty comments are a sequence of dashes followed by `>`. 1050 $span_of_dashes = strspn( $html, '-', $closer_at ); 1051 if ( '>' === $html[ $closer_at + $span_of_dashes ] ) { 1052 $at = $closer_at + $span_of_dashes + 1; 1053 continue; 1054 } 1055 1056 /* 1057 * Comments may be closed by either a --> or an invalid --!>. 1058 * The first occurrence closes the comment. 1059 * 1060 * See https://html.spec.whatwg.org/#parse-error-incorrectly-closed-comment 1061 */ 1062 $closer_at--; // Pre-increment inside condition below reduces risk of accidental infinite looping. 1063 while ( ++$closer_at < strlen( $html ) ) { 1064 $closer_at = strpos( $html, '--', $closer_at ); 1065 if ( false === $closer_at ) { 1066 return false; 1067 } 1068 1069 if ( $closer_at + 2 < strlen( $html ) && '>' === $html[ $closer_at + 2 ] ) { 1070 $at = $closer_at + 3; 1071 continue 2; 1072 } 1073 1074 if ( $closer_at + 3 < strlen( $html ) && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) { 1075 $at = $closer_at + 4; 1076 continue 2; 1077 } 1078 } 1049 1079 } 1050 1080 … … 1106 1136 1107 1137 /* 1138 * </> is a missing end tag name, which is ignored. 1139 * 1140 * See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name 1141 */ 1142 if ( '>' === $html[ $at + 1 ] ) { 1143 $at++; 1144 continue; 1145 } 1146 1147 /* 1108 1148 * <? transitions to a bogus comment state – skip to the nearest > 1109 * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state1149 * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state 1110 1150 */ 1111 1151 if ( '?' === $html[ $at + 1 ] ) { 1112 1152 $closer_at = strpos( $html, '>', $at + 2 ); 1153 if ( false === $closer_at ) { 1154 return false; 1155 } 1156 1157 $at = $closer_at + 1; 1158 continue; 1159 } 1160 1161 /* 1162 * If a non-alpha starts the tag name in a tag closer it's a comment. 1163 * Find the first `>`, which closes the comment. 1164 * 1165 * See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name 1166 */ 1167 if ( $this->is_closing_tag ) { 1168 $closer_at = strpos( $html, '>', $at + 3 ); 1113 1169 if ( false === $closer_at ) { 1114 1170 return false; -
branches/6.2/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
r55662 r55668 1684 1684 1685 1685 /** 1686 * Invalid tag names are comments on tag closers. 1687 * 1688 * @ticket 58007 1689 * 1690 * @link https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name 1691 * 1692 * @dataProvider data_next_tag_ignores_invalid_first_character_of_tag_name_comments 1693 * 1694 * @param string $html_with_markers HTML containing an invalid tag closer whose element before and 1695 * element after contain the "start" and "end" CSS classes. 1696 */ 1697 public function test_next_tag_ignores_invalid_first_character_of_tag_name_comments( $html_with_markers ) { 1698 $p = new WP_HTML_Tag_Processor( $html_with_markers ); 1699 $p->next_tag( array( 'class_name' => 'start' ) ); 1700 $p->next_tag(); 1701 1702 $this->assertSame( 'end', $p->get_attribute( 'class' ) ); 1703 } 1704 1705 /** 1706 * Data provider. 1707 * 1708 * @return array[] 1709 */ 1710 public function data_next_tag_ignores_invalid_first_character_of_tag_name_comments() { 1711 return array( 1712 'Invalid tag openers as normal text' => array( 1713 '<ul><li><div class=start>I <3 when outflow > inflow</div><img class=end></li></ul>', 1714 ), 1715 1716 'Invalid tag closers as comments' => array( 1717 '<ul><li><div class=start>I </3 when <img> outflow <br class=end> inflow</div></li></ul>', 1718 ), 1719 1720 'Unexpected question mark instead of tag name' => array( 1721 '<div class=start><?xml-stylesheet type="text/css" href="style.css"?><hr class=end>', 1722 ), 1723 ); 1724 } 1725 1726 /** 1686 1727 * @ticket 56299 1687 1728 * … … 1732 1773 'rcdata_tag' => 'TEXTAREA', 1733 1774 ), 1775 ); 1776 } 1777 1778 /** 1779 * Ensures that the invalid comment closing syntax "--!>" properly closes a comment. 1780 * 1781 * @ticket 58007 1782 * 1783 * @covers WP_HTML_Tag_Processor::next_tag 1784 * 1785 */ 1786 public function test_allows_incorrectly_closed_comments() { 1787 $p = new WP_HTML_Tag_Processor( '<img id=before><!-- <img id=inside> --!><img id=after>--><img id=final>' ); 1788 1789 $p->next_tag(); 1790 $this->assertSame( 'before', $p->get_attribute( 'id' ), 'Did not find starting tag.' ); 1791 1792 $p->next_tag(); 1793 $this->assertSame( 'after', $p->get_attribute( 'id' ), 'Did not properly close improperly-closed comment.' ); 1794 1795 $p->next_tag(); 1796 $this->assertSame( 'final', $p->get_attribute( 'id' ), 'Did not skip over unopened comment-closer.' ); 1797 } 1798 1799 /** 1800 * Ensures that unclosed and invalid comments don't trigger warnings or errors. 1801 * 1802 * @ticket 58007 1803 * 1804 * @covers WP_HTML_Tag_Processor::next_tag 1805 * 1806 * @dataProvider data_html_with_unclosed_comments 1807 * 1808 * @param string $html_ending_before_comment_close HTML with opened comments that aren't closed 1809 */ 1810 public function test_documents_may_end_with_unclosed_comment( $html_ending_before_comment_close ) { 1811 $p = new WP_HTML_Tag_Processor( $html_ending_before_comment_close ); 1812 1813 $this->assertFalse( $p->next_tag() ); 1814 } 1815 1816 /** 1817 * Data provider. 1818 * 1819 * @return array[] 1820 */ 1821 public function data_html_with_unclosed_comments() { 1822 return array( 1823 'Shortest open valid comment' => array( '<!--' ), 1824 'Basic truncated comment' => array( '<!-- this ends --' ), 1825 'Comment with closer look-alike' => array( '<!-- this ends --x' ), 1826 'Comment with closer look-alike 2' => array( '<!-- this ends --!x' ), 1827 'Invalid tag-closer comment' => array( '</(when will this madness end?)' ), 1828 'Invalid tag-closer comment 2' => array( '</(when will this madness end?)--' ), 1829 ); 1830 } 1831 1832 /** 1833 * Ensures that abruptly-closed empty comments are properly closed. 1834 * 1835 * @ticket 58007 1836 * 1837 * @covers WP_HTML_Tag_Processor::next_tag 1838 * 1839 * @dataProvider data_abruptly_closed_empty_comments 1840 * 1841 * @param string $html_with_after_marker HTML to test with "id=after" on element immediately following an abruptly closed comment. 1842 */ 1843 public function test_closes_abrupt_closing_of_empty_comment( $html_with_after_marker ) { 1844 $p = new WP_HTML_Tag_Processor( $html_with_after_marker ); 1845 $p->next_tag(); 1846 $p->next_tag(); 1847 1848 $this->assertSame( 'after', $p->get_attribute( 'id' ), 'Did not find tag after closing abruptly-closed comment' ); 1849 } 1850 1851 /** 1852 * Data provider. 1853 * 1854 * @return array[] 1855 */ 1856 public function data_abruptly_closed_empty_comments() { 1857 return array( 1858 'Empty comment with two dashes only' => array( '<hr><!--><hr id=after>' ), 1859 'Empty comment with two dashes only, improperly closed' => array( '<hr><!--!><hr id=inside>--><hr id=after>' ), 1860 'Comment with two dashes only, improperly closed twice' => array( '<hr><!--!><hr id=inside>--!><hr id=after>' ), 1861 'Empty comment with three dashes' => array( '<hr><!---><hr id=after>' ), 1862 'Empty comment with three dashes, improperly closed' => array( '<hr><!---!><hr id=inside>--><hr id=after>' ), 1863 'Comment with three dashes, improperly closed twice' => array( '<hr><!---!><hr id=inside>--!><hr id=after>' ), 1864 'Empty comment with four dashes' => array( '<hr><!----><hr id=after>' ), 1865 'Empty comment with four dashes, improperly closed' => array( '<hr><!----!><hr id=after>--><hr id=final>' ), 1866 'Comment with four dashes, improperly closed twice' => array( '<hr><!----!><hr id=after>--!><hr id=final>' ), 1867 'Comment with almost-closer inside' => array( '<hr><!-- ---!><hr id=after>--!><hr id=final>' ), 1734 1868 ); 1735 1869 }
Note: See TracChangeset
for help on using the changeset viewer.