Changeset 55667
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/html-api/class-wp-html-tag-processor.php
r55659 r55667 972 972 * 973 973 * @since 6.2.0 974 * @since 6.2.1 Support abruptly-closed comments, invalid-tag-closer-comments, and empty elements. 974 975 * 975 976 * @return bool Whether a tag was found before the end of the document. … … 1040 1041 '-' === $html[ $at + 3 ] 1041 1042 ) { 1042 $closer_at = strpos( $html, '-->', $at + 4 ); 1043 if ( false === $closer_at ) { 1043 $closer_at = $at + 4; 1044 // If it's not possible to close the comment then there is nothing more to scan. 1045 if ( strlen( $html ) <= $closer_at ) { 1044 1046 return false; 1045 1047 } 1046 1048 1047 $at = $closer_at + 3; 1048 continue; 1049 // Abruptly-closed empty comments are a sequence of dashes followed by `>`. 1050 $span_of_dashes = strspn( $html, '-', $closer_at ); 1051 if ( '>' === $html[ $closer_at + $span_of_dashes ] ) { 1052 $at = $closer_at + $span_of_dashes + 1; 1053 continue; 1054 } 1055 1056 /* 1057 * Comments may be closed by either a --> or an invalid --!>. 1058 * The first occurrence closes the comment. 1059 * 1060 * See https://html.spec.whatwg.org/#parse-error-incorrectly-closed-comment 1061 */ 1062 $closer_at--; // Pre-increment inside condition below reduces risk of accidental infinite looping. 1063 while ( ++$closer_at < strlen( $html ) ) { 1064 $closer_at = strpos( $html, '--', $closer_at ); 1065 if ( false === $closer_at ) { 1066 return false; 1067 } 1068 1069 if ( $closer_at + 2 < strlen( $html ) && '>' === $html[ $closer_at + 2 ] ) { 1070 $at = $closer_at + 3; 1071 continue 2; 1072 } 1073 1074 if ( $closer_at + 3 < strlen( $html ) && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) { 1075 $at = $closer_at + 4; 1076 continue 2; 1077 } 1078 } 1049 1079 } 1050 1080 … … 1106 1136 1107 1137 /* 1138 * </> is a missing end tag name, which is ignored. 1139 * 1140 * See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name 1141 */ 1142 if ( '>' === $html[ $at + 1 ] ) { 1143 $at++; 1144 continue; 1145 } 1146 1147 /* 1108 1148 * <? transitions to a bogus comment state – skip to the nearest > 1109 * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state1149 * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state 1110 1150 */ 1111 1151 if ( '?' === $html[ $at + 1 ] ) { 1112 1152 $closer_at = strpos( $html, '>', $at + 2 ); 1153 if ( false === $closer_at ) { 1154 return false; 1155 } 1156 1157 $at = $closer_at + 1; 1158 continue; 1159 } 1160 1161 /* 1162 * If a non-alpha starts the tag name in a tag closer it's a comment. 1163 * Find the first `>`, which closes the comment. 1164 * 1165 * See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name 1166 */ 1167 if ( $this->is_closing_tag ) { 1168 $closer_at = strpos( $html, '>', $at + 3 ); 1113 1169 if ( false === $closer_at ) { 1114 1170 return false; -
trunk/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
r55659 r55667 1734 1734 1735 1735 /** 1736 * Invalid tag names are comments on tag closers. 1737 * 1738 * @ticket 58007 1739 * 1740 * @link https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name 1741 * 1742 * @dataProvider data_next_tag_ignores_invalid_first_character_of_tag_name_comments 1743 * 1744 * @param string $html_with_markers HTML containing an invalid tag closer whose element before and 1745 * element after contain the "start" and "end" CSS classes. 1746 */ 1747 public function test_next_tag_ignores_invalid_first_character_of_tag_name_comments( $html_with_markers ) { 1748 $p = new WP_HTML_Tag_Processor( $html_with_markers ); 1749 $p->next_tag( array( 'class_name' => 'start' ) ); 1750 $p->next_tag(); 1751 1752 $this->assertSame( 'end', $p->get_attribute( 'class' ) ); 1753 } 1754 1755 /** 1756 * Data provider. 1757 * 1758 * @return array[] 1759 */ 1760 public function data_next_tag_ignores_invalid_first_character_of_tag_name_comments() { 1761 return array( 1762 'Invalid tag openers as normal text' => array( 1763 '<ul><li><div class=start>I <3 when outflow > inflow</div><img class=end></li></ul>', 1764 ), 1765 1766 'Invalid tag closers as comments' => array( 1767 '<ul><li><div class=start>I </3 when <img> outflow <br class=end> inflow</div></li></ul>', 1768 ), 1769 1770 'Unexpected question mark instead of tag name' => array( 1771 '<div class=start><?xml-stylesheet type="text/css" href="style.css"?><hr class=end>', 1772 ), 1773 ); 1774 } 1775 1776 /** 1736 1777 * @ticket 56299 1737 1778 * … … 1782 1823 'rcdata_tag' => 'TEXTAREA', 1783 1824 ), 1825 ); 1826 } 1827 1828 /** 1829 * Ensures that the invalid comment closing syntax "--!>" properly closes a comment. 1830 * 1831 * @ticket 58007 1832 * 1833 * @covers WP_HTML_Tag_Processor::next_tag 1834 * 1835 */ 1836 public function test_allows_incorrectly_closed_comments() { 1837 $p = new WP_HTML_Tag_Processor( '<img id=before><!-- <img id=inside> --!><img id=after>--><img id=final>' ); 1838 1839 $p->next_tag(); 1840 $this->assertSame( 'before', $p->get_attribute( 'id' ), 'Did not find starting tag.' ); 1841 1842 $p->next_tag(); 1843 $this->assertSame( 'after', $p->get_attribute( 'id' ), 'Did not properly close improperly-closed comment.' ); 1844 1845 $p->next_tag(); 1846 $this->assertSame( 'final', $p->get_attribute( 'id' ), 'Did not skip over unopened comment-closer.' ); 1847 } 1848 1849 /** 1850 * Ensures that unclosed and invalid comments don't trigger warnings or errors. 1851 * 1852 * @ticket 58007 1853 * 1854 * @covers WP_HTML_Tag_Processor::next_tag 1855 * 1856 * @dataProvider data_html_with_unclosed_comments 1857 * 1858 * @param string $html_ending_before_comment_close HTML with opened comments that aren't closed 1859 */ 1860 public function test_documents_may_end_with_unclosed_comment( $html_ending_before_comment_close ) { 1861 $p = new WP_HTML_Tag_Processor( $html_ending_before_comment_close ); 1862 1863 $this->assertFalse( $p->next_tag() ); 1864 } 1865 1866 /** 1867 * Data provider. 1868 * 1869 * @return array[] 1870 */ 1871 public function data_html_with_unclosed_comments() { 1872 return array( 1873 'Shortest open valid comment' => array( '<!--' ), 1874 'Basic truncated comment' => array( '<!-- this ends --' ), 1875 'Comment with closer look-alike' => array( '<!-- this ends --x' ), 1876 'Comment with closer look-alike 2' => array( '<!-- this ends --!x' ), 1877 'Invalid tag-closer comment' => array( '</(when will this madness end?)' ), 1878 'Invalid tag-closer comment 2' => array( '</(when will this madness end?)--' ), 1879 ); 1880 } 1881 1882 /** 1883 * Ensures that abruptly-closed empty comments are properly closed. 1884 * 1885 * @ticket 58007 1886 * 1887 * @covers WP_HTML_Tag_Processor::next_tag 1888 * 1889 * @dataProvider data_abruptly_closed_empty_comments 1890 * 1891 * @param string $html_with_after_marker HTML to test with "id=after" on element immediately following an abruptly closed comment. 1892 */ 1893 public function test_closes_abrupt_closing_of_empty_comment( $html_with_after_marker ) { 1894 $p = new WP_HTML_Tag_Processor( $html_with_after_marker ); 1895 $p->next_tag(); 1896 $p->next_tag(); 1897 1898 $this->assertSame( 'after', $p->get_attribute( 'id' ), 'Did not find tag after closing abruptly-closed comment' ); 1899 } 1900 1901 /** 1902 * Data provider. 1903 * 1904 * @return array[] 1905 */ 1906 public function data_abruptly_closed_empty_comments() { 1907 return array( 1908 'Empty comment with two dashes only' => array( '<hr><!--><hr id=after>' ), 1909 'Empty comment with two dashes only, improperly closed' => array( '<hr><!--!><hr id=inside>--><hr id=after>' ), 1910 'Comment with two dashes only, improperly closed twice' => array( '<hr><!--!><hr id=inside>--!><hr id=after>' ), 1911 'Empty comment with three dashes' => array( '<hr><!---><hr id=after>' ), 1912 'Empty comment with three dashes, improperly closed' => array( '<hr><!---!><hr id=inside>--><hr id=after>' ), 1913 'Comment with three dashes, improperly closed twice' => array( '<hr><!---!><hr id=inside>--!><hr id=after>' ), 1914 'Empty comment with four dashes' => array( '<hr><!----><hr id=after>' ), 1915 'Empty comment with four dashes, improperly closed' => array( '<hr><!----!><hr id=after>--><hr id=final>' ), 1916 'Comment with four dashes, improperly closed twice' => array( '<hr><!----!><hr id=after>--!><hr id=final>' ), 1917 'Comment with almost-closer inside' => array( '<hr><!-- ---!><hr id=after>--!><hr id=final>' ), 1784 1918 ); 1785 1919 }
Note: See TracChangeset
for help on using the changeset viewer.