Changeset 58779
- Timestamp:
- 07/22/2024 10:22:03 PM (18 months ago)
- Location:
- trunk
- Files:
-
- 1 added
- 13 edited
-
src/wp-includes/html-api/class-wp-html-active-formatting-elements.php (modified) (2 diffs)
-
src/wp-includes/html-api/class-wp-html-open-elements.php (modified) (13 diffs)
-
src/wp-includes/html-api/class-wp-html-processor-state.php (modified) (3 diffs)
-
src/wp-includes/html-api/class-wp-html-processor.php (modified) (15 diffs)
-
src/wp-includes/html-api/class-wp-html-tag-processor.php (modified) (10 diffs)
-
src/wp-includes/html-api/class-wp-html-token.php (modified) (1 diff)
-
tests/phpunit/tests/html-api/wpHtmlProcessor.php (modified) (6 diffs)
-
tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php (modified) (9 diffs)
-
tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php (modified) (3 diffs)
-
tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php (modified) (3 diffs)
-
tests/phpunit/tests/html-api/wpHtmlSupportRequiredHtmlProcessor.php (modified) (1 diff)
-
tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php (modified) (7 diffs)
-
tests/phpunit/tests/html-api/wpHtmlTagProcessor-token-scanning.php (modified) (1 diff)
-
tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php (added)
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php
r58769 r58779 85 85 86 86 return $current_node ? $current_node : null; 87 } 88 89 /** 90 * Inserts a "marker" at the end of the list of active formatting elements. 91 * 92 * > The markers are inserted when entering applet, object, marquee, 93 * > template, td, th, and caption elements, and are used to prevent 94 * > formatting from "leaking" into applet, object, marquee, template, 95 * > td, th, and caption elements. 96 * 97 * @see https://html.spec.whatwg.org/#concept-parser-marker 98 * 99 * @since 6.7.0 100 */ 101 public function insert_marker(): void { 102 $this->push( new WP_HTML_Token( null, 'marker', false ) ); 87 103 } 88 104 … … 185 201 } 186 202 } 203 204 /** 205 * Clears the list of active formatting elements up to the last marker. 206 * 207 * > When the steps below require the UA to clear the list of active formatting elements up to 208 * > the last marker, the UA must perform the following steps: 209 * > 210 * > 1. Let entry be the last (most recently added) entry in the list of active 211 * > formatting elements. 212 * > 2. Remove entry from the list of active formatting elements. 213 * > 3. If entry was a marker, then stop the algorithm at this point. 214 * > The list has been cleared up to the last marker. 215 * > 4. Go to step 1. 216 * 217 * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-list-of-active-formatting-elements-up-to-the-last-marker 218 * 219 * @since 6.7.0 220 */ 221 public function clear_up_to_last_marker(): void { 222 foreach ( $this->walk_up() as $item ) { 223 array_pop( $this->stack ); 224 if ( 'marker' === $item->node_name ) { 225 break; 226 } 227 } 228 } 187 229 } -
trunk/src/wp-includes/html-api/class-wp-html-open-elements.php
r58769 r58779 103 103 104 104 /** 105 * Returns the name of the node at the nth position on the stack 106 * of open elements, or `null` if no such position exists. 107 * 108 * Note that this uses a 1-based index, which represents the 109 * "nth item" on the stack, counting from the top, where the 110 * top-most element is the 1st, the second is the 2nd, etc... 111 * 112 * @since 6.7.0 113 * 114 * @param int $nth Retrieve the nth item on the stack, with 1 being 115 * the top element, 2 being the second, etc... 116 * @return string|null Name of the node on the stack at the given location, 117 * or `null` if the location isn't on the stack. 118 */ 119 public function at( int $nth ): ?string { 120 foreach ( $this->walk_down() as $item ) { 121 if ( 0 === --$nth ) { 122 return $item->node_name; 123 } 124 } 125 126 return null; 127 } 128 129 /** 130 * Reports if a node of a given name is in the stack of open elements. 131 * 132 * @since 6.7.0 133 * 134 * @param string $node_name Name of node for which to check. 135 * @return bool Whether a node of the given name is in the stack of open elements. 136 */ 137 public function contains( string $node_name ): bool { 138 foreach ( $this->walk_up() as $item ) { 139 if ( $node_name === $item->node_name ) { 140 return true; 141 } 142 } 143 144 return false; 145 } 146 147 /** 105 148 * Reports if a specific node is in the stack of open elements. 106 149 * … … 112 155 public function contains_node( WP_HTML_Token $token ): bool { 113 156 foreach ( $this->walk_up() as $item ) { 114 if ( $token ->bookmark_name === $item->bookmark_name) {157 if ( $token === $item ) { 115 158 return true; 116 159 } … … 211 254 } 212 255 213 switch ( $node->node_name ) {214 case 'HTML':215 return false;216 }217 218 256 if ( in_array( $node->node_name, $termination_list, true ) ) { 219 257 return false; … … 227 265 * Returns whether a particular element is in scope. 228 266 * 229 * @since 6.4.0 267 * > The stack of open elements is said to have a particular element in 268 * > scope when it has that element in the specific scope consisting of 269 * > the following element types: 270 * > 271 * > - applet 272 * > - caption 273 * > - html 274 * > - table 275 * > - td 276 * > - th 277 * > - marquee 278 * > - object 279 * > - template 280 * > - MathML mi 281 * > - MathML mo 282 * > - MathML mn 283 * > - MathML ms 284 * > - MathML mtext 285 * > - MathML annotation-xml 286 * > - SVG foreignObject 287 * > - SVG desc 288 * > - SVG title 289 * 290 * @since 6.4.0 291 * @since 6.7.0 Supports all required HTML elements. 230 292 * 231 293 * @see https://html.spec.whatwg.org/#has-an-element-in-scope … … 238 300 $tag_name, 239 301 array( 240 241 /* 242 * Because it's not currently possible to encounter 243 * one of the termination elements, they don't need 244 * to be listed here. If they were, they would be 245 * unreachable and only waste CPU cycles while 246 * scanning through HTML. 247 */ 302 'APPLET', 303 'CAPTION', 304 'HTML', 305 'TABLE', 306 'TD', 307 'TH', 308 'MARQUEE', 309 'OBJECT', 310 'TEMPLATE', 311 // @todo: Support SVG and MathML nodes when support for foreign content is added. 248 312 ) 249 313 ); … … 253 317 * Returns whether a particular element is in list item scope. 254 318 * 319 * > The stack of open elements is said to have a particular element 320 * > in list item scope when it has that element in the specific scope 321 * > consisting of the following element types: 322 * > 323 * > - All the element types listed above for the has an element in scope algorithm. 324 * > - ol in the HTML namespace 325 * > - ul in the HTML namespace 326 * 255 327 * @since 6.4.0 256 328 * @since 6.5.0 Implemented: no longer throws on every invocation. 329 * @since 6.7.0 Supports all required HTML elements. 257 330 * 258 331 * @see https://html.spec.whatwg.org/#has-an-element-in-list-item-scope … … 265 338 $tag_name, 266 339 array( 267 // There are more elements that belong here which aren't currently supported. 340 'APPLET', 341 'BUTTON', 342 'CAPTION', 343 'HTML', 344 'TABLE', 345 'TD', 346 'TH', 347 'MARQUEE', 348 'OBJECT', 268 349 'OL', 350 'TEMPLATE', 269 351 'UL', 352 // @todo: Support SVG and MathML nodes when support for foreign content is added. 270 353 ) 271 354 ); … … 275 358 * Returns whether a particular element is in button scope. 276 359 * 277 * @since 6.4.0 360 * > The stack of open elements is said to have a particular element 361 * > in button scope when it has that element in the specific scope 362 * > consisting of the following element types: 363 * > 364 * > - All the element types listed above for the has an element in scope algorithm. 365 * > - button in the HTML namespace 366 * 367 * @since 6.4.0 368 * @since 6.7.0 Supports all required HTML elements. 278 369 * 279 370 * @see https://html.spec.whatwg.org/#has-an-element-in-button-scope … … 283 374 */ 284 375 public function has_element_in_button_scope( string $tag_name ): bool { 285 return $this->has_element_in_specific_scope( $tag_name, array( 'BUTTON' ) ); 376 return $this->has_element_in_specific_scope( 377 $tag_name, 378 array( 379 'APPLET', 380 'BUTTON', 381 'CAPTION', 382 'HTML', 383 'TABLE', 384 'TD', 385 'TH', 386 'MARQUEE', 387 'OBJECT', 388 'TEMPLATE', 389 // @todo: Support SVG and MathML nodes when support for foreign content is added. 390 ) 391 ); 286 392 } 287 393 … … 289 395 * Returns whether a particular element is in table scope. 290 396 * 291 * @since 6.4.0 397 * > The stack of open elements is said to have a particular element 398 * > in table scope when it has that element in the specific scope 399 * > consisting of the following element types: 400 * > 401 * > - html in the HTML namespace 402 * > - table in the HTML namespace 403 * > - template in the HTML namespace 404 * 405 * @since 6.4.0 406 * @since 6.7.0 Full implementation. 292 407 * 293 408 * @see https://html.spec.whatwg.org/#has-an-element-in-table-scope 294 *295 * @throws WP_HTML_Unsupported_Exception Always until this function is implemented.296 409 * 297 410 * @param string $tag_name Name of tag to check. … … 299 412 */ 300 413 public function has_element_in_table_scope( string $tag_name ): bool { 301 throw new WP_HTML_Unsupported_Exception( 'Cannot process elements depending on table scope.' ); 302 303 return false; // The linter requires this unreachable code until the function is implemented and can return. 414 return $this->has_element_in_specific_scope( 415 $tag_name, 416 array( 417 'HTML', 418 'TABLE', 419 'TEMPLATE', 420 ) 421 ); 304 422 } 305 423 … … 541 659 */ 542 660 switch ( $item->node_name ) { 661 case 'APPLET': 543 662 case 'BUTTON': 663 case 'CAPTION': 664 case 'HTML': 665 case 'TABLE': 666 case 'TD': 667 case 'TH': 668 case 'MARQUEE': 669 case 'OBJECT': 670 case 'TEMPLATE': 544 671 $this->has_p_in_button_scope = false; 545 672 break; … … 574 701 */ 575 702 switch ( $item->node_name ) { 703 case 'APPLET': 576 704 case 'BUTTON': 705 case 'CAPTION': 706 case 'HTML': 707 case 'P': 708 case 'TABLE': 709 case 'TD': 710 case 'TH': 711 case 'MARQUEE': 712 case 'OBJECT': 713 case 'TEMPLATE': 577 714 $this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' ); 578 715 break; 579 580 case 'P':581 $this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' );582 break;583 716 } 584 717 -
trunk/src/wp-includes/html-api/class-wp-html-processor-state.php
r58769 r58779 313 313 314 314 /** 315 * No-quirks mode document compatability mode. 316 * 317 * > In no-quirks mode, the behavior is (hopefully) the desired behavior 318 * > described by the modern HTML and CSS specifications. 319 * 320 * @since 6.7.0 321 * 322 * @var string 323 */ 324 const NO_QUIRKS_MODE = 'no-quirks-mode'; 325 326 /** 327 * Quirks mode document compatability mode. 328 * 329 * > In quirks mode, layout emulates behavior in Navigator 4 and Internet 330 * > Explorer 5. This is essential in order to support websites that were 331 * > built before the widespread adoption of web standards. 332 * 333 * @since 6.7.0 334 * 335 * @var string 336 */ 337 const QUIRKS_MODE = 'quirks-mode'; 338 339 /** 315 340 * The stack of template insertion modes. 316 341 * … … 370 395 371 396 /** 397 * Indicates if the document is in quirks mode or no-quirks mode. 398 * 399 * Impact on HTML parsing: 400 * 401 * - In `NO_QUIRKS_MODE` CSS class and ID selectors match in a byte-for-byte 402 * manner, otherwise for backwards compatability, class selectors are to 403 * match in an ASCII case-insensitive manner. 404 * 405 * - When not in `QUIRKS_MODE`, a TABLE start tag implicitly closes an open P tag 406 * if one is in scope and open, otherwise the TABLE becomes a child of the P. 407 * 408 * `QUIRKS_MODE` impacts many styling-related aspects of an HTML document, but 409 * none of the other changes modifies how the HTML is parsed or selected. 410 * 411 * @see self::QUIRKS_MODE 412 * @see self::NO_QUIRKS_MODE 413 * 414 * @since 6.7.0 415 * 416 * @var string 417 */ 418 public $document_mode = self::NO_QUIRKS_MODE; 419 420 /** 372 421 * Context node initializing fragment parser, if created as a fragment parser. 373 422 * … … 390 439 */ 391 440 public $head_element = null; 441 442 /** 443 * FORM element pointer. 444 * 445 * > points to the last form element that was opened and whose end tag has 446 * > not yet been seen. It is used to make form controls associate with 447 * > forms in the face of dramatically bad markup, for historical reasons. 448 * > It is ignored inside template elements. 449 * 450 * @todo This may be invalidated by a seek operation. 451 * 452 * @see https://html.spec.whatwg.org/#form-element-pointer 453 * 454 * @since 6.7.0 455 * 456 * @var WP_HTML_Token|null 457 */ 458 public $form_element = null; 392 459 393 460 /** -
trunk/src/wp-includes/html-api/class-wp-html-processor.php
r58769 r58779 98 98 * that the HTML Processor won't break any HTML it doesn't fully understand. 99 99 * 100 * The following list specifies the HTML tags that _are_ supported:100 * The HTML Processor supports all elements other than a specific set: 101 101 * 102 * - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY. 103 * - Custom elements: All custom elements are supported. :) 104 * - Form elements: BUTTON, DATALIST, FIELDSET, INPUT, LABEL, LEGEND, METER, OPTGROUP, OPTION, PROGRESS, SEARCH, SELECT. 105 * - Formatting elements: B, BIG, CODE, EM, FONT, I, PRE, SMALL, STRIKE, STRONG, TT, U, WBR. 106 * - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP. 107 * - Links: A. 108 * - Lists: DD, DL, DT, LI, OL, UL. 109 * - Media elements: AUDIO, CANVAS, EMBED, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, SOURCE, TRACK, VIDEO. 110 * - Paragraph: BR, P. 111 * - Phrasing elements: ABBR, AREA, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR. 112 * - Sectioning elements: ARTICLE, ASIDE, HR, NAV, SECTION. 113 * - Templating elements: SLOT. 114 * - Text decoration: RUBY. 115 * - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, KEYGEN, LISTING, MULTICOL, NEXTID, PARAM, SPACER. 102 * - Any element inside a TABLE. 103 * - Any element inside foreign content, including SVG and MATH. 104 * - Any element outside the IN BODY insertion mode, e.g. doctype declarations, meta, links. 116 105 * 117 106 * ### Supported markup … … 122 111 * such a case it will stop processing. 123 112 * 124 * The following list specifies HTML markup that _is_ supported: 113 * The following list illustrates some common examples of unexpected HTML inputs that 114 * the HTML Processor properly parses and represents: 125 115 * 126 * - Markup involving only those tags listed above. 127 * - Fully-balanced and non-overlapping tags. 128 * - HTML with unexpected tag closers. 129 * - Some unbalanced or overlapping tags. 130 * - P tags after unclosed P tags. 131 * - BUTTON tags after unclosed BUTTON tags. 132 * - A tags after unclosed A tags that don't involve any active formatting elements. 116 * - HTML with optional tags omitted, e.g. `<p>one<p>two`. 117 * - HTML with unexpected tag closers, e.g. `<p>one </span> more</p>`. 118 * - Non-void tags with self-closing flag, e.g. `<div/>the DIV is still open.</div>`. 119 * - Heading elements which close open heading elements of another level, e.g. `<h1>Closed by </h2>`. 120 * - Elements containing text that looks like other tags but isn't, e.g. `<title>The <img> is plaintext</title>`. 121 * - SCRIPT and STYLE tags containing text that looks like HTML but isn't, e.g. `<script>document.write('<p>Hi</p>');</script>`. 122 * - SCRIPT content which has been escaped, e.g. `<script><!-- document.write('<script>console.log("hi")</script>') --></script>`. 123 * 124 * ### Unsupported Features 125 * 126 * This parser does not report parse errors. 127 * 128 * Normally, when additional HTML or BODY tags are encountered in a document, if there 129 * are any additional attributes on them that aren't found on the previous elements, 130 * the existing HTML and BODY elements adopt those missing attribute values. This 131 * parser does not add those additional attributes. 132 * 133 * In certain situations, elements are moved to a different part of the document in 134 * a process called "adoption" and "fostering." Because the nodes move to a location 135 * in the document that the parser had already processed, this parser does not support 136 * these situations and will bail. 133 137 * 134 138 * @since 6.4.0 … … 1105 1109 1106 1110 switch ( $op ) { 1107 case '#comment':1108 case '#funky-comment':1109 case '#presumptuous-tag':1110 $this->insert_html_element( $this->state->current_token );1111 return true;1112 1113 1111 case '#text': 1114 $this->reconstruct_active_formatting_elements();1115 1116 1112 $current_token = $this->bookmarks[ $this->state->current_token->bookmark_name ]; 1117 1113 … … 1134 1130 } 1135 1131 1132 $this->reconstruct_active_formatting_elements(); 1133 1136 1134 /* 1137 1135 * Whitespace-only text does not affect the frameset-ok flag. … … 1147 1145 return true; 1148 1146 1147 case '#comment': 1148 case '#funky-comment': 1149 case '#presumptuous-tag': 1150 $this->insert_html_element( $this->state->current_token ); 1151 return true; 1152 1153 /* 1154 * > A DOCTYPE token 1155 * > Parse error. Ignore the token. 1156 */ 1149 1157 case 'html': 1158 return $this->step(); 1159 1160 /* 1161 * > A start tag whose tag name is "html" 1162 */ 1163 case '+HTML': 1164 if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) { 1165 /* 1166 * > Otherwise, for each attribute on the token, check to see if the attribute 1167 * > is already present on the top element of the stack of open elements. If 1168 * > it is not, add the attribute and its corresponding value to that element. 1169 * 1170 * This parser does not currently support this behavior: ignore the token. 1171 */ 1172 } 1173 1174 // Ignore the token. 1175 return $this->step(); 1176 1177 /* 1178 * > A start tag whose tag name is one of: "base", "basefont", "bgsound", "link", 1179 * > "meta", "noframes", "script", "style", "template", "title" 1180 * > 1181 * > An end tag whose tag name is "template" 1182 */ 1183 case '+BASE': 1184 case '+BASEFONT': 1185 case '+BGSOUND': 1186 case '+LINK': 1187 case '+META': 1188 case '+NOFRAMES': 1189 case '+SCRIPT': 1190 case '+STYLE': 1191 case '+TEMPLATE': 1192 case '+TITLE': 1193 case '-TEMPLATE': 1194 return $this->step_in_head(); 1195 1196 /* 1197 * > A start tag whose tag name is "body" 1198 * 1199 * This tag in the IN BODY insertion mode is a parse error. 1200 */ 1201 case '+BODY': 1202 if ( 1203 1 === $this->state->stack_of_open_elements->count() || 1204 'BODY' !== $this->state->stack_of_open_elements->at( 2 ) || 1205 $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) 1206 ) { 1207 // Ignore the token. 1208 return $this->step(); 1209 } 1210 1150 1211 /* 1151 * > A DOCTYPE token 1152 * > Parse error. Ignore the token. 1212 * > Otherwise, set the frameset-ok flag to "not ok"; then, for each attribute 1213 * > on the token, check to see if the attribute is already present on the body 1214 * > element (the second element) on the stack of open elements, and if it is 1215 * > not, add the attribute and its corresponding value to that element. 1216 * 1217 * This parser does not currently support this behavior: ignore the token. 1153 1218 */ 1219 $this->state->frameset_ok = false; 1154 1220 return $this->step(); 1155 1221 1156 1222 /* 1157 * > A start tag whose tag name is "button" 1158 */ 1159 case '+BUTTON': 1160 if ( $this->state->stack_of_open_elements->has_element_in_scope( 'BUTTON' ) ) { 1161 // @todo Indicate a parse error once it's possible. This error does not impact the logic here. 1162 $this->generate_implied_end_tags(); 1163 $this->state->stack_of_open_elements->pop_until( 'BUTTON' ); 1164 } 1165 1166 $this->reconstruct_active_formatting_elements(); 1167 $this->insert_html_element( $this->state->current_token ); 1168 $this->state->frameset_ok = false; 1169 1170 return true; 1223 * > A start tag whose tag name is "frameset" 1224 * 1225 * This tag in the IN BODY insertion mode is a parse error. 1226 */ 1227 case '+FRAMESET': 1228 if ( 1229 1 === $this->state->stack_of_open_elements->count() || 1230 'BODY' !== $this->state->stack_of_open_elements->at( 2 ) || 1231 false === $this->state->frameset_ok 1232 ) { 1233 // Ignore the token. 1234 return $this->step(); 1235 } 1236 1237 /* 1238 * > Otherwise, run the following steps: 1239 */ 1240 $this->bail( 'Cannot process non-ignored FRAMESET tags.' ); 1241 break; 1242 1243 /* 1244 * > An end tag whose tag name is "body" 1245 */ 1246 case '-BODY': 1247 if ( ! $this->state->stack_of_open_elements->has_element_in_scope( 'BODY' ) ) { 1248 // Parse error: ignore the token. 1249 return $this->step(); 1250 } 1251 1252 /* 1253 * > Otherwise, if there is a node in the stack of open elements that is not either a 1254 * > dd element, a dt element, an li element, an optgroup element, an option element, 1255 * > a p element, an rb element, an rp element, an rt element, an rtc element, a tbody 1256 * > element, a td element, a tfoot element, a th element, a thread element, a tr 1257 * > element, the body element, or the html element, then this is a parse error. 1258 * 1259 * There is nothing to do for this parse error, so don't check for it. 1260 */ 1261 1262 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY; 1263 return true; 1264 1265 /* 1266 * > An end tag whose tag name is "html" 1267 */ 1268 case '-HTML': 1269 if ( ! $this->state->stack_of_open_elements->has_element_in_scope( 'BODY' ) ) { 1270 // Parse error: ignore the token. 1271 return $this->step(); 1272 } 1273 1274 /* 1275 * > Otherwise, if there is a node in the stack of open elements that is not either a 1276 * > dd element, a dt element, an li element, an optgroup element, an option element, 1277 * > a p element, an rb element, an rp element, an rt element, an rtc element, a tbody 1278 * > element, a td element, a tfoot element, a th element, a thread element, a tr 1279 * > element, the body element, or the html element, then this is a parse error. 1280 * 1281 * There is nothing to do for this parse error, so don't check for it. 1282 */ 1283 1284 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY; 1285 return $this->step( self::REPROCESS_CURRENT_NODE ); 1171 1286 1172 1287 /* … … 1209 1324 1210 1325 /* 1326 * > A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" 1327 */ 1328 case '+H1': 1329 case '+H2': 1330 case '+H3': 1331 case '+H4': 1332 case '+H5': 1333 case '+H6': 1334 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { 1335 $this->close_a_p_element(); 1336 } 1337 1338 if ( 1339 in_array( 1340 $this->state->stack_of_open_elements->current_node()->node_name, 1341 array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), 1342 true 1343 ) 1344 ) { 1345 // @todo Indicate a parse error once it's possible. 1346 $this->state->stack_of_open_elements->pop(); 1347 } 1348 1349 $this->insert_html_element( $this->state->current_token ); 1350 return true; 1351 1352 /* 1353 * > A start tag whose tag name is one of: "pre", "listing" 1354 */ 1355 case '+PRE': 1356 case '+LISTING': 1357 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { 1358 $this->close_a_p_element(); 1359 } 1360 1361 /* 1362 * > If the next token is a U+000A LINE FEED (LF) character token, 1363 * > then ignore that token and move on to the next one. (Newlines 1364 * > at the start of pre blocks are ignored as an authoring convenience.) 1365 * 1366 * This is handled in `get_modifiable_text()`. 1367 */ 1368 1369 $this->insert_html_element( $this->state->current_token ); 1370 $this->state->frameset_ok = false; 1371 return true; 1372 1373 /* 1374 * > A start tag whose tag name is "form" 1375 */ 1376 case '+FORM': 1377 $stack_contains_template = $this->state->stack_of_open_elements->contains( 'TEMPLATE' ); 1378 1379 if ( isset( $this->state->form_element ) && ! $stack_contains_template ) { 1380 // Parse error: ignore the token. 1381 return $this->step(); 1382 } 1383 1384 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { 1385 $this->close_a_p_element(); 1386 } 1387 1388 $this->insert_html_element( $this->state->current_token ); 1389 if ( ! $stack_contains_template ) { 1390 $this->state->form_element = $this->state->current_token; 1391 } 1392 1393 return true; 1394 1395 /* 1396 * > A start tag whose tag name is "li" 1397 * > A start tag whose tag name is one of: "dd", "dt" 1398 */ 1399 case '+DD': 1400 case '+DT': 1401 case '+LI': 1402 $this->state->frameset_ok = false; 1403 $node = $this->state->stack_of_open_elements->current_node(); 1404 $is_li = 'LI' === $token_name; 1405 1406 in_body_list_loop: 1407 /* 1408 * The logic for LI and DT/DD is the same except for one point: LI elements _only_ 1409 * close other LI elements, but a DT or DD element closes _any_ open DT or DD element. 1410 */ 1411 if ( $is_li ? 'LI' === $node->node_name : ( 'DD' === $node->node_name || 'DT' === $node->node_name ) ) { 1412 $node_name = $is_li ? 'LI' : $node->node_name; 1413 $this->generate_implied_end_tags( $node_name ); 1414 if ( ! $this->state->stack_of_open_elements->current_node_is( $node_name ) ) { 1415 // @todo Indicate a parse error once it's possible. This error does not impact the logic here. 1416 } 1417 1418 $this->state->stack_of_open_elements->pop_until( $node_name ); 1419 goto in_body_list_done; 1420 } 1421 1422 if ( 1423 'ADDRESS' !== $node->node_name && 1424 'DIV' !== $node->node_name && 1425 'P' !== $node->node_name && 1426 $this->is_special( $node->node_name ) 1427 ) { 1428 /* 1429 * > If node is in the special category, but is not an address, div, 1430 * > or p element, then jump to the step labeled done below. 1431 */ 1432 goto in_body_list_done; 1433 } else { 1434 /* 1435 * > Otherwise, set node to the previous entry in the stack of open elements 1436 * > and return to the step labeled loop. 1437 */ 1438 foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) { 1439 $node = $item; 1440 break; 1441 } 1442 goto in_body_list_loop; 1443 } 1444 1445 in_body_list_done: 1446 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { 1447 $this->close_a_p_element(); 1448 } 1449 1450 $this->insert_html_element( $this->state->current_token ); 1451 return true; 1452 1453 case '+PLAINTEXT': 1454 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { 1455 $this->close_a_p_element(); 1456 } 1457 1458 /* 1459 * @todo This may need to be handled in the Tag Processor and turn into 1460 * a single self-contained tag like TEXTAREA, whose modifiable text 1461 * is the rest of the input document as plaintext. 1462 */ 1463 $this->bail( 'Cannot process PLAINTEXT elements.' ); 1464 break; 1465 1466 /* 1467 * > A start tag whose tag name is "button" 1468 */ 1469 case '+BUTTON': 1470 if ( $this->state->stack_of_open_elements->has_element_in_scope( 'BUTTON' ) ) { 1471 // @todo Indicate a parse error once it's possible. This error does not impact the logic here. 1472 $this->generate_implied_end_tags(); 1473 $this->state->stack_of_open_elements->pop_until( 'BUTTON' ); 1474 } 1475 1476 $this->reconstruct_active_formatting_elements(); 1477 $this->insert_html_element( $this->state->current_token ); 1478 $this->state->frameset_ok = false; 1479 1480 return true; 1481 1482 /* 1211 1483 * > An end tag whose tag name is one of: "address", "article", "aside", "blockquote", 1212 1484 * > "button", "center", "details", "dialog", "dir", "div", "dl", "fieldset", 1213 1485 * > "figcaption", "figure", "footer", "header", "hgroup", "listing", "main", 1214 1486 * > "menu", "nav", "ol", "pre", "search", "section", "summary", "ul" 1487 * 1488 * @todo This needs to check if the element in scope is an HTML element, meaning that 1489 * when SVG and MathML support is added, this needs to differentiate between an 1490 * HTML element of the given name, such as `<center>`, and a foreign element of 1491 * the same given name. 1215 1492 */ 1216 1493 case '-ADDRESS': … … 1255 1532 1256 1533 /* 1257 * > A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" 1258 */ 1259 case '+H1': 1260 case '+H2': 1261 case '+H3': 1262 case '+H4': 1263 case '+H5': 1264 case '+H6': 1265 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { 1266 $this->close_a_p_element(); 1267 } 1268 1269 if ( 1270 in_array( 1271 $this->state->stack_of_open_elements->current_node()->node_name, 1272 array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), 1273 true 1274 ) 1275 ) { 1276 // @todo Indicate a parse error once it's possible. 1277 $this->state->stack_of_open_elements->pop(); 1278 } 1279 1280 $this->insert_html_element( $this->state->current_token ); 1281 return true; 1282 1283 /* 1284 * > A start tag whose tag name is one of: "pre", "listing" 1285 */ 1286 case '+PRE': 1287 case '+LISTING': 1288 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { 1289 $this->close_a_p_element(); 1290 } 1291 $this->insert_html_element( $this->state->current_token ); 1292 $this->state->frameset_ok = false; 1293 return true; 1294 1295 /* 1296 * > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" 1297 */ 1298 case '-H1': 1299 case '-H2': 1300 case '-H3': 1301 case '-H4': 1302 case '-H5': 1303 case '-H6': 1304 if ( ! $this->state->stack_of_open_elements->has_element_in_scope( '(internal: H1 through H6 - do not use)' ) ) { 1534 * > An end tag whose tag name is "form" 1535 */ 1536 case '-FORM': 1537 if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) { 1538 $node = $this->state->form_element; 1539 $this->state->form_element = null; 1540 1305 1541 /* 1306 * This is a parse error; ignore the token. 1542 * > If node is null or if the stack of open elements does not have node 1543 * > in scope, then this is a parse error; return and ignore the token. 1307 1544 * 1308 * @todo Indicate a parse error once it's possible. 1545 * @todo It's necessary to check if the form token itself is in scope, not 1546 * simply whether any FORM is in scope. 1309 1547 */ 1310 return $this->step(); 1311 } 1312 1313 $this->generate_implied_end_tags(); 1314 1315 if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) { 1316 // @todo Record parse error: this error doesn't impact parsing. 1317 } 1318 1319 $this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' ); 1320 return true; 1321 1322 /* 1323 * > A start tag whose tag name is "li" 1324 * > A start tag whose tag name is one of: "dd", "dt" 1325 */ 1326 case '+DD': 1327 case '+DT': 1328 case '+LI': 1329 $this->state->frameset_ok = false; 1330 $node = $this->state->stack_of_open_elements->current_node(); 1331 $is_li = 'LI' === $token_name; 1332 1333 in_body_list_loop: 1334 /* 1335 * The logic for LI and DT/DD is the same except for one point: LI elements _only_ 1336 * close other LI elements, but a DT or DD element closes _any_ open DT or DD element. 1337 */ 1338 if ( $is_li ? 'LI' === $node->node_name : ( 'DD' === $node->node_name || 'DT' === $node->node_name ) ) { 1339 $node_name = $is_li ? 'LI' : $node->node_name; 1340 $this->generate_implied_end_tags( $node_name ); 1341 if ( ! $this->state->stack_of_open_elements->current_node_is( $node_name ) ) { 1548 if ( 1549 null === $node || 1550 ! $this->state->stack_of_open_elements->has_element_in_scope( 'FORM' ) 1551 ) { 1552 // Parse error: ignore the token. 1553 return $this->step(); 1554 } 1555 1556 $this->generate_implied_end_tags(); 1557 if ( $node !== $this->state->stack_of_open_elements->current_node() ) { 1558 // @todo Indicate a parse error once it's possible. This error does not impact the logic here. 1559 $this->bail( 'Cannot close a FORM when other elements remain open as this would throw off the breadcrumbs for the following tokens.' ); 1560 } 1561 1562 $this->state->stack_of_open_elements->remove_node( $node ); 1563 } else { 1564 /* 1565 * > If the stack of open elements does not have a form element in scope, 1566 * > then this is a parse error; return and ignore the token. 1567 * 1568 * Note that unlike in the clause above, this is checking for any FORM in scope. 1569 */ 1570 if ( ! $this->state->stack_of_open_elements->has_element_in_scope( 'FORM' ) ) { 1571 // Parse error: ignore the token. 1572 return $this->step(); 1573 } 1574 1575 $this->generate_implied_end_tags(); 1576 1577 if ( ! $this->state->stack_of_open_elements->current_node_is( 'FORM' ) ) { 1342 1578 // @todo Indicate a parse error once it's possible. This error does not impact the logic here. 1343 1579 } 1344 1580 1345 $this->state->stack_of_open_elements->pop_until( $node_name ); 1346 goto in_body_list_done; 1347 } 1348 1349 if ( 1350 'ADDRESS' !== $node->node_name && 1351 'DIV' !== $node->node_name && 1352 'P' !== $node->node_name && 1353 $this->is_special( $node->node_name ) 1354 ) { 1355 /* 1356 * > If node is in the special category, but is not an address, div, 1357 * > or p element, then jump to the step labeled done below. 1358 */ 1359 goto in_body_list_done; 1360 } else { 1361 /* 1362 * > Otherwise, set node to the previous entry in the stack of open elements 1363 * > and return to the step labeled loop. 1364 */ 1365 foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) { 1366 $node = $item; 1367 break; 1368 } 1369 goto in_body_list_loop; 1370 } 1371 1372 in_body_list_done: 1373 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { 1374 $this->close_a_p_element(); 1375 } 1376 1377 $this->insert_html_element( $this->state->current_token ); 1581 $this->state->stack_of_open_elements->pop_until( 'FORM' ); 1582 return true; 1583 } 1584 break; 1585 1586 /* 1587 * > An end tag whose tag name is "p" 1588 */ 1589 case '-P': 1590 if ( ! $this->state->stack_of_open_elements->has_p_in_button_scope() ) { 1591 $this->insert_html_element( $this->state->current_token ); 1592 } 1593 1594 $this->close_a_p_element(); 1378 1595 return true; 1379 1596 … … 1424 1641 1425 1642 /* 1426 * > An end tag whose tag name is "p" 1427 */ 1428 case '-P': 1429 if ( ! $this->state->stack_of_open_elements->has_p_in_button_scope() ) { 1430 $this->insert_html_element( $this->state->current_token ); 1431 } 1432 1433 $this->close_a_p_element(); 1434 return true; 1435 1436 // > A start tag whose tag name is "a" 1643 * > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" 1644 */ 1645 case '-H1': 1646 case '-H2': 1647 case '-H3': 1648 case '-H4': 1649 case '-H5': 1650 case '-H6': 1651 if ( ! $this->state->stack_of_open_elements->has_element_in_scope( '(internal: H1 through H6 - do not use)' ) ) { 1652 /* 1653 * This is a parse error; ignore the token. 1654 * 1655 * @todo Indicate a parse error once it's possible. 1656 */ 1657 return $this->step(); 1658 } 1659 1660 $this->generate_implied_end_tags(); 1661 1662 if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) { 1663 // @todo Record parse error: this error doesn't impact parsing. 1664 } 1665 1666 $this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' ); 1667 return true; 1668 1669 /* 1670 * > A start tag whose tag name is "a" 1671 */ 1437 1672 case '+A': 1438 1673 foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { … … 1476 1711 1477 1712 /* 1713 * > A start tag whose tag name is "nobr" 1714 */ 1715 case '+NOBR': 1716 $this->reconstruct_active_formatting_elements(); 1717 1718 if ( $this->state->stack_of_open_elements->has_element_in_scope( 'NOBR' ) ) { 1719 // Parse error. 1720 $this->run_adoption_agency_algorithm(); 1721 $this->reconstruct_active_formatting_elements(); 1722 } 1723 1724 $this->insert_html_element( $this->state->current_token ); 1725 $this->state->active_formatting_elements->push( $this->state->current_token ); 1726 return true; 1727 1728 /* 1478 1729 * > An end tag whose tag name is one of: "a", "b", "big", "code", "em", "font", "i", 1479 1730 * > "nobr", "s", "small", "strike", "strong", "tt", "u" … … 1496 1747 1497 1748 /* 1749 * > A start tag whose tag name is one of: "applet", "marquee", "object" 1750 */ 1751 case '+APPLET': 1752 case '+MARQUEE': 1753 case '+OBJECT': 1754 $this->reconstruct_active_formatting_elements(); 1755 $this->insert_html_element( $this->state->current_token ); 1756 $this->state->active_formatting_elements->insert_marker(); 1757 $this->state->frameset_ok = false; 1758 return true; 1759 1760 /* 1761 * > A end tag token whose tag name is one of: "applet", "marquee", "object" 1762 * 1763 * @todo This needs to check if the element in scope is an HTML element, meaning that 1764 * when SVG and MathML support is added, this needs to differentiate between an 1765 * HTML element of the given name, such as `<object>`, and a foreign element of 1766 * the same given name. 1767 */ 1768 case '-APPLET': 1769 case '-MARQUEE': 1770 case '-OBJECT': 1771 if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $token_name ) ) { 1772 // Parse error: ignore the token. 1773 return $this->step(); 1774 } 1775 1776 $this->generate_implied_end_tags(); 1777 if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) { 1778 // This is a parse error. 1779 } 1780 1781 $this->state->stack_of_open_elements->pop_until( $token_name ); 1782 $this->state->active_formatting_elements->clear_up_to_last_marker(); 1783 return true; 1784 1785 /* 1786 * > A start tag whose tag name is "table" 1787 */ 1788 case '+TABLE': 1789 if ( 1790 WP_HTML_Processor_State::QUIRKS_MODE !== $this->state->document_mode && 1791 $this->state->stack_of_open_elements->has_p_in_button_scope() 1792 ) { 1793 $this->close_a_p_element(); 1794 } 1795 1796 $this->insert_html_element( $this->state->current_token ); 1797 $this->state->frameset_ok = false; 1798 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; 1799 return true; 1800 1801 /* 1498 1802 * > An end tag whose tag name is "br" 1499 * > Parse error. Drop the attributes from the token, and act as described in the next 1500 * > entry; i.e. act as if this was a "br" start tag token with no attributes, rather 1501 * > than the end tag token that it actually is. 1502 */ 1503 case '-BR': 1504 $this->bail( 'Closing BR tags require unimplemented special handling.' ); 1505 // This return required because PHPCS can't determine that the call to bail() throws. 1506 return false; 1803 * 1804 * This is prevented from happening because the Tag Processor 1805 * reports all closing BR tags as if they were opening tags. 1806 */ 1507 1807 1508 1808 /* … … 1526 1826 $this->reconstruct_active_formatting_elements(); 1527 1827 $this->insert_html_element( $this->state->current_token ); 1528 $type_attribute = $this->get_attribute( 'type' ); 1828 1529 1829 /* 1530 1830 * > If the token does not have an attribute with the name "type", or if it does, … … 1532 1832 * > string "hidden", then: set the frameset-ok flag to "not ok". 1533 1833 */ 1834 $type_attribute = $this->get_attribute( 'type' ); 1534 1835 if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) { 1535 1836 $this->state->frameset_ok = false; 1536 1837 } 1838 1839 return true; 1840 1841 /* 1842 * > A start tag whose tag name is one of: "param", "source", "track" 1843 */ 1844 case '+PARAM': 1845 case '+SOURCE': 1846 case '+TRACK': 1847 $this->insert_html_element( $this->state->current_token ); 1537 1848 return true; 1538 1849 … … 1549 1860 1550 1861 /* 1551 * > A start tag whose tag name is one of: "param", "source", "track" 1552 */ 1553 case '+PARAM': 1554 case '+SOURCE': 1555 case '+TRACK': 1862 * > A start tag whose tag name is "image" 1863 */ 1864 case '+IMAGE': 1865 /* 1866 * > Parse error. Change the token's tag name to "img" and reprocess it. (Don't ask.) 1867 * 1868 * Note that this is handled elsewhere, so it should not be possible to reach this code. 1869 */ 1870 $this->bail( "Cannot process an IMAGE tag. (Don't ask.)" ); 1871 break; 1872 1873 /* 1874 * > A start tag whose tag name is "textarea" 1875 */ 1876 case '+TEXTAREA': 1877 $this->insert_html_element( $this->state->current_token ); 1878 1879 /* 1880 * > If the next token is a U+000A LINE FEED (LF) character token, then ignore 1881 * > that token and move on to the next one. (Newlines at the start of 1882 * > textarea elements are ignored as an authoring convenience.) 1883 * 1884 * This is handled in `get_modifiable_text()`. 1885 */ 1886 1887 $this->state->frameset_ok = false; 1888 1889 /* 1890 * > Switch the insertion mode to "text". 1891 * 1892 * As a self-contained node, this behavior is handled in the Tag Processor. 1893 */ 1894 return true; 1895 1896 /* 1897 * > A start tag whose tag name is "xmp" 1898 */ 1899 case '+XMP': 1900 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { 1901 $this->close_a_p_element(); 1902 } 1903 1904 $this->reconstruct_active_formatting_elements(); 1905 $this->state->frameset_ok = false; 1906 1907 /* 1908 * > Follow the generic raw text element parsing algorithm. 1909 * 1910 * As a self-contained node, this behavior is handled in the Tag Processor. 1911 */ 1912 $this->insert_html_element( $this->state->current_token ); 1913 return true; 1914 1915 /* 1916 * A start tag whose tag name is "iframe" 1917 */ 1918 case '+IFRAME': 1919 $this->state->frameset_ok = false; 1920 1921 /* 1922 * > Follow the generic raw text element parsing algorithm. 1923 * 1924 * As a self-contained node, this behavior is handled in the Tag Processor. 1925 */ 1926 $this->insert_html_element( $this->state->current_token ); 1927 return true; 1928 1929 /* 1930 * > A start tag whose tag name is "noembed" 1931 * > A start tag whose tag name is "noscript", if the scripting flag is enabled 1932 * 1933 * The scripting flag is never enabled in this parser. 1934 */ 1935 case '+NOEMBED': 1556 1936 $this->insert_html_element( $this->state->current_token ); 1557 1937 return true; … … 1598 1978 $this->insert_html_element( $this->state->current_token ); 1599 1979 return true; 1600 } 1601 1602 /* 1603 * These tags require special handling in the 'in body' insertion mode 1604 * but that handling hasn't yet been implemented. 1605 * 1606 * As the rules for each tag are implemented, the corresponding tag 1607 * name should be removed from this list. An accompanying test should 1608 * help ensure this list is maintained. 1609 * 1610 * @see Tests_HtmlApi_WpHtmlProcessor::test_step_in_body_fails_on_unsupported_tags 1611 * 1612 * Since this switch structure throws a WP_HTML_Unsupported_Exception, it's 1613 * possible to handle "any other start tag" and "any other end tag" below, 1614 * as that guarantees execution doesn't proceed for the unimplemented tags. 1615 * 1616 * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody 1617 */ 1618 switch ( $token_name ) { 1619 case 'APPLET': 1620 case 'BASE': 1621 case 'BASEFONT': 1622 case 'BGSOUND': 1623 case 'BODY': 1624 case 'CAPTION': 1625 case 'COL': 1626 case 'COLGROUP': 1627 case 'FORM': 1628 case 'FRAME': 1629 case 'FRAMESET': 1630 case 'HEAD': 1631 case 'HTML': 1632 case 'IFRAME': 1633 case 'LINK': 1634 case 'MARQUEE': 1635 case 'MATH': 1636 case 'META': 1637 case 'NOBR': 1638 case 'NOEMBED': 1639 case 'NOFRAMES': 1640 case 'NOSCRIPT': 1641 case 'OBJECT': 1642 case 'PLAINTEXT': 1643 case 'RB': 1644 case 'RP': 1645 case 'RT': 1646 case 'RTC': 1647 case 'SARCASM': 1648 case 'SCRIPT': 1649 case 'STYLE': 1650 case 'SVG': 1651 case 'TABLE': 1652 case 'TBODY': 1653 case 'TD': 1654 case 'TEMPLATE': 1655 case 'TEXTAREA': 1656 case 'TFOOT': 1657 case 'TH': 1658 case 'THEAD': 1659 case 'TITLE': 1660 case 'TR': 1661 case 'XMP': 1662 $this->bail( "Cannot process {$token_name} element." ); 1980 1981 /* 1982 * > A start tag whose tag name is one of: "rb", "rtc" 1983 */ 1984 case '+RB': 1985 case '+RTC': 1986 if ( $this->state->stack_of_open_elements->has_element_in_scope( 'RUBY' ) ) { 1987 $this->generate_implied_end_tags(); 1988 1989 if ( $this->state->stack_of_open_elements->current_node_is( 'RUBY' ) ) { 1990 // @todo Indicate a parse error once it's possible. 1991 } 1992 } 1993 1994 $this->insert_html_element( $this->state->current_token ); 1995 return true; 1996 1997 /* 1998 * > A start tag whose tag name is one of: "rp", "rt" 1999 */ 2000 case '+RP': 2001 case '+RT': 2002 if ( $this->state->stack_of_open_elements->has_element_in_scope( 'RUBY' ) ) { 2003 $this->generate_implied_end_tags( 'RTC' ); 2004 2005 $current_node_name = $this->state->stack_of_open_elements->current_node()->node_name; 2006 if ( 'RTC' === $current_node_name || 'RUBY' === $current_node_name ) { 2007 // @todo Indicate a parse error once it's possible. 2008 } 2009 } 2010 2011 $this->insert_html_element( $this->state->current_token ); 2012 return true; 2013 2014 /* 2015 * > A start tag whose tag name is "math" 2016 */ 2017 case '+MATH': 2018 $this->reconstruct_active_formatting_elements(); 2019 2020 /* 2021 * @todo Adjust MathML attributes for the token. (This fixes the case of MathML attributes that are not all lowercase.) 2022 * @todo Adjust foreign attributes for the token. (This fixes the use of namespaced attributes, in particular XLink.) 2023 * 2024 * These ought to be handled in the attribute methods. 2025 */ 2026 2027 $this->bail( 'Cannot process MATH element, opening foreign content.' ); 2028 break; 2029 2030 /* 2031 * > A start tag whose tag name is "svg" 2032 */ 2033 case '+SVG': 2034 $this->reconstruct_active_formatting_elements(); 2035 2036 /* 2037 * @todo Adjust SVG attributes for the token. (This fixes the case of SVG attributes that are not all lowercase.) 2038 * @todo Adjust foreign attributes for the token. (This fixes the use of namespaced attributes, in particular XLink in SVG.) 2039 * 2040 * These ought to be handled in the attribute methods. 2041 */ 2042 2043 $this->bail( 'Cannot process SVG element, opening foreign content.' ); 2044 break; 2045 2046 /* 2047 * > A start tag whose tag name is one of: "caption", "col", "colgroup", 2048 * > "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr" 2049 */ 2050 case '+CAPTION': 2051 case '+COL': 2052 case '+COLGROUP': 2053 case '+FRAME': 2054 case '+HEAD': 2055 case '+TBODY': 2056 case '+TD': 2057 case '+TFOOT': 2058 case '+TH': 2059 case '+THEAD': 2060 case '+TR': 2061 // Parse error. Ignore the token. 2062 return $this->step(); 1663 2063 } 1664 2064 … … 1682 2082 */ 1683 2083 foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { 2084 /* 2085 * @todo This needs to check if the element in scope is an HTML element, meaning that 2086 * when SVG and MathML support is added, this needs to differentiate between an 2087 * HTML element of the given name, such as `<object>`, and a foreign element of 2088 * the same given name. 2089 */ 1684 2090 if ( $token_name === $node->node_name ) { 1685 2091 break; -
trunk/src/wp-includes/html-api/class-wp-html-tag-processor.php
r58769 r58779 130 130 * true === $processor->next_tag( 'DIV' ); 131 131 * 132 * #### Special elements132 * #### Special self-contained elements 133 133 * 134 134 * Some HTML elements are handled in a special way; their start and end tags … … 757 757 758 758 /** 759 * Whether the parser should skip over an immediately-following linefeed 760 * character, as is the case with LISTING, PRE, and TEXTAREA. 761 * 762 * > If the next token is a U+000A LINE FEED (LF) character token, then 763 * > ignore that token and move on to the next one. (Newlines at the start 764 * > of [these] elements are ignored as an authoring convenience.) 765 * 766 * @since 6.7.0 767 * 768 * @var int|null 769 */ 770 private $skip_newline_at = null; 771 772 /** 759 773 * Constructor. 760 774 * … … 927 941 928 942 /* 929 * For non-DATA sections which might contain text that looks like HTML tags but 930 * isn't, scan with the appropriate alternative mode. Looking at the first letter 931 * of the tag name as a pre-check avoids a string allocation when it's not needed. 932 */ 933 $t = $this->html[ $this->tag_name_starts_at ]; 943 * Certain tags require additional processing. The first-letter pre-check 944 * avoids unnecessary string allocation when comparing the tag names. 945 * 946 * - IFRAME 947 * - LISTING (deprecated) 948 * - NOEMBED (deprecated) 949 * - NOFRAMES (deprecated) 950 * - PRE 951 * - SCRIPT 952 * - STYLE 953 * - TEXTAREA 954 * - TITLE 955 * - XMP (deprecated) 956 */ 934 957 if ( 935 958 $this->is_closing_tag || 936 ! ( 937 'i' === $t || 'I' === $t || 938 'n' === $t || 'N' === $t || 939 's' === $t || 'S' === $t || 940 't' === $t || 'T' === $t || 941 'x' === $t || 'X' === $t 942 ) 959 1 !== strspn( $this->html, 'iIlLnNpPsStTxX', $this->tag_name_starts_at, 1 ) 943 960 ) { 944 961 return true; … … 948 965 949 966 /* 967 * For LISTING, PRE, and TEXTAREA, the first linefeed of an immediately-following 968 * text node is ignored as an authoring convenience. 969 * 970 * @see static::skip_newline_at 971 */ 972 if ( 'LISTING' === $tag_name || 'PRE' === $tag_name ) { 973 $this->skip_newline_at = $this->bytes_already_parsed; 974 return true; 975 } 976 977 /* 978 * There are certain elements whose children are not DATA but are instead 979 * RCDATA or RAWTEXT. These cannot contain other elements, and the contents 980 * are parsed as plaintext, with character references decoded in RCDATA but 981 * not in RAWTEXT. 982 * 983 * These elements are described here as "self-contained" or special atomic 984 * elements whose end tag is consumed with the opening tag, and they will 985 * contain modifiable text inside of them. 986 * 950 987 * Preserve the opening tag pointers, as these will be overwritten 951 988 * when finding the closing tag. They will be reset after finding … … 2691 2728 * 2692 2729 * @since 6.2.0 2730 * @since 6.7.0 Reports all BR tags as opening tags. 2693 2731 * 2694 2732 * @return bool Whether the current tag is a tag closer. … … 2697 2735 return ( 2698 2736 self::STATE_MATCHED_TAG === $this->parser_state && 2699 $this->is_closing_tag 2737 $this->is_closing_tag && 2738 2739 /* 2740 * The BR tag can only exist as an opening tag. If something like `</br>` 2741 * appears then the HTML parser will treat it as an opening tag with no 2742 * attributes. The BR tag is unique in this way. 2743 * 2744 * @see https://html.spec.whatwg.org/#parsing-main-inbody 2745 */ 2746 'BR' !== $this->get_tag() 2700 2747 ); 2701 2748 } … … 2826 2873 * have an empty string (e.g. a comment with no contents). 2827 2874 * 2875 * Limitations: 2876 * 2877 * - This function will not strip the leading newline appropriately 2878 * after seeking into a LISTING or PRE element. To ensure that the 2879 * newline is treated properly, seek to the LISTING or PRE opening 2880 * tag instead of to the first text node inside the element. 2881 * 2828 2882 * @since 6.5.0 2883 * @since 6.7.0 Replaces NULL bytes (U+0000) and newlines appropriately. 2829 2884 * 2830 2885 * @return string 2831 2886 */ 2832 2887 public function get_modifiable_text(): string { 2833 if ( null === $this->text_starts_at ) {2888 if ( null === $this->text_starts_at || 0 === $this->text_length ) { 2834 2889 return ''; 2835 2890 } 2836 2891 2837 2892 $text = substr( $this->html, $this->text_starts_at, $this->text_length ); 2893 2894 /* 2895 * Pre-processing the input stream would normally happen before 2896 * any parsing is done, but deferring it means it's possible to 2897 * skip in most cases. When getting the modifiable text, however 2898 * it's important to apply the pre-processing steps, which is 2899 * normalizing newlines. 2900 * 2901 * @see https://html.spec.whatwg.org/#preprocessing-the-input-stream 2902 * @see https://infra.spec.whatwg.org/#normalize-newlines 2903 */ 2904 $text = str_replace( "\r\n", "\n", $text ); 2905 $text = str_replace( "\r", "\n", $text ); 2838 2906 2839 2907 // Comment data is not decoded. … … 2844 2912 self::STATE_FUNKY_COMMENT === $this->parser_state 2845 2913 ) { 2846 return $text;2847 } 2848 2849 $tag_name = $this->get_t ag();2914 return str_replace( "\x00", "\u{FFFD}", $text ); 2915 } 2916 2917 $tag_name = $this->get_token_name(); 2850 2918 if ( 2851 2919 // Script data is not decoded. … … 2859 2927 'XMP' === $tag_name 2860 2928 ) { 2861 return $text;2929 return str_replace( "\x00", "\u{FFFD}", $text ); 2862 2930 } 2863 2931 … … 2865 2933 2866 2934 /* 2867 * TEXTAREA skips a leading newline, but this newline may appear not only as the 2868 * literal character `\n`, but also as a character reference, such as in the 2869 * following markup: `<textarea>
Content</textarea>`. 2870 * 2871 * For these cases it's important to first decode the text content before checking 2872 * for a leading newline and removing it. 2935 * Skip the first line feed after LISTING, PRE, and TEXTAREA opening tags. 2936 * 2937 * Note that this first newline may come in the form of a character 2938 * reference, such as `
`, and so it's important to perform 2939 * this transformation only after decoding the raw text content. 2873 2940 */ 2874 2941 if ( 2875 self::STATE_MATCHED_TAG === $this->parser_state && 2876 'TEXTAREA' === $tag_name && 2877 strlen( $decoded ) > 0 && 2878 "\n" === $decoded[0] 2942 ( "\n" === ( $decoded[0] ?? '' ) ) && 2943 ( ( $this->skip_newline_at === $this->token_starts_at && '#text' === $tag_name ) || 'TEXTAREA' === $tag_name ) 2879 2944 ) { 2880 return substr( $decoded, 1 ); 2881 } 2882 2883 return $decoded; 2945 $decoded = substr( $decoded, 1 ); 2946 } 2947 2948 /* 2949 * Only in normative text nodes does the NULL byte (U+0000) get removed. 2950 * In all other contexts it's replaced by the replacement character (U+FFFD) 2951 * for security reasons (to avoid joining together strings that were safe 2952 * when separated, but not when joined). 2953 */ 2954 return '#text' === $tag_name 2955 ? str_replace( "\x00", '', $decoded ) 2956 : str_replace( "\x00", "\u{FFFD}", $decoded ); 2884 2957 } 2885 2958 -
trunk/src/wp-includes/html-api/class-wp-html-token.php
r58769 r58779 73 73 * @since 6.4.0 74 74 * 75 * @param string $bookmark_name Name of bookmark corresponding to location in HTML where token is found. 75 * @param string|null $bookmark_name Name of bookmark corresponding to location in HTML where token is found, 76 * or `null` for markers and nodes without a bookmark. 76 77 * @param string $node_name Name of node token represents; if uppercase, an HTML element; if lowercase, a special value like "marker". 77 78 * @param bool $has_self_closing_flag Whether the source token contains the self-closing flag, regardless of whether it's valid. 78 79 * @param callable|null $on_destroy Optional. Function to call when destroying token, useful for releasing the bookmark. 79 80 */ 80 public function __construct( string $bookmark_name, string $node_name, bool $has_self_closing_flag, ?callable $on_destroy = null ) {81 public function __construct( ?string $bookmark_name, string $node_name, bool $has_self_closing_flag, ?callable $on_destroy = null ) { 81 82 $this->bookmark_name = $bookmark_name; 82 83 $this->node_name = $node_name; -
trunk/tests/phpunit/tests/html-api/wpHtmlProcessor.php
r58677 r58779 135 135 * @covers WP_HTML_Processor::is_void 136 136 * 137 * @dataProvider data_void_tags 137 * @dataProvider data_void_tags_not_ignored_in_body 138 138 * 139 139 * @param string $tag_name Name of void tag under test. … … 251 251 ); 252 252 253 foreach ( self::data_void_tags () as $tag_name => $_name ) {253 foreach ( self::data_void_tags_not_ignored_in_body() as $tag_name => $_name ) { 254 254 $self_contained_nodes[ "Void elements ({$tag_name})" ] = array( "<{$tag_name}>" ); 255 255 } … … 285 285 * @ticket 60382 286 286 * 287 * @dataProvider data_void_tags 287 * @dataProvider data_void_tags_not_ignored_in_body 288 288 * 289 289 * @param string $tag_name Name of void tag under test. … … 319 319 $processor->get_breadcrumbs(), 320 320 'Found incorrect nesting of first element.' 321 );322 323 $this->assertTrue(324 $processor->next_token(),325 'Should have found the DIV as the second tag.'326 );327 328 $this->assertSame(329 array( 'HTML', 'BODY', 'DIV' ),330 $processor->get_breadcrumbs(),331 "DIV should have been a sibling of the {$tag_name}."332 321 ); 333 322 } … … 359 348 360 349 /** 350 * Data provider. 351 * 352 * @return array[] 353 */ 354 public static function data_void_tags_not_ignored_in_body() { 355 $all_void_tags = self::data_void_tags(); 356 unset( $all_void_tags['COL'] ); 357 358 return $all_void_tags; 359 } 360 361 /** 361 362 * Ensures that special handling of unsupported tags is cleaned up 362 363 * as handling is implemented. Otherwise there's risk of leaving special … … 384 385 public static function data_unsupported_special_in_body_tags() { 385 386 return array( 386 'APPLET' => array( 'APPLET' ), 387 'BASE' => array( 'BASE' ), 388 'BASEFONT' => array( 'BASEFONT' ), 389 'BGSOUND' => array( 'BGSOUND' ), 390 'BODY' => array( 'BODY' ), 391 'CAPTION' => array( 'CAPTION' ), 392 'COL' => array( 'COL' ), 393 'COLGROUP' => array( 'COLGROUP' ), 394 'FORM' => array( 'FORM' ), 395 'FRAME' => array( 'FRAME' ), 396 'FRAMESET' => array( 'FRAMESET' ), 397 'HEAD' => array( 'HEAD' ), 398 'HTML' => array( 'HTML' ), 399 'IFRAME' => array( 'IFRAME' ), 400 'LINK' => array( 'LINK' ), 401 'MARQUEE' => array( 'MARQUEE' ), 402 'MATH' => array( 'MATH' ), 403 'META' => array( 'META' ), 404 'NOBR' => array( 'NOBR' ), 405 'NOEMBED' => array( 'NOEMBED' ), 406 'NOFRAMES' => array( 'NOFRAMES' ), 407 'NOSCRIPT' => array( 'NOSCRIPT' ), 408 'OBJECT' => array( 'OBJECT' ), 409 'PLAINTEXT' => array( 'PLAINTEXT' ), 410 'RB' => array( 'RB' ), 411 'RP' => array( 'RP' ), 412 'RT' => array( 'RT' ), 413 'RTC' => array( 'RTC' ), 414 'SARCASM' => array( 'SARCASM' ), 415 'SCRIPT' => array( 'SCRIPT' ), 416 'STYLE' => array( 'STYLE' ), 417 'SVG' => array( 'SVG' ), 418 'TABLE' => array( 'TABLE' ), 419 'TBODY' => array( 'TBODY' ), 420 'TD' => array( 'TD' ), 421 'TEMPLATE' => array( 'TEMPLATE' ), 422 'TEXTAREA' => array( 'TEXTAREA' ), 423 'TFOOT' => array( 'TFOOT' ), 424 'TH' => array( 'TH' ), 425 'THEAD' => array( 'THEAD' ), 426 'TITLE' => array( 'TITLE' ), 427 'TR' => array( 'TR' ), 428 'XMP' => array( 'XMP' ), 387 'MATH' => array( 'MATH' ), 388 'SVG' => array( 'SVG' ), 429 389 ); 430 390 } -
trunk/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
r58741 r58779 41 41 'ACRONYM', // Neutralized. 42 42 'ADDRESS', 43 'APPLET', // Deprecated. 43 44 'AREA', 44 45 'ARTICLE', … … 73 74 'FIGURE', 74 75 'FONT', 76 'FORM', 75 77 'FOOTER', 76 78 'H1', … … 96 98 'MAP', 97 99 'MARK', 100 'MARQUEE', // Deprecated. 98 101 'MENU', 99 102 'METER', … … 101 104 'NAV', 102 105 'NEXTID', // Deprecated. 106 'NOBR', // Neutralized. 107 'NOSCRIPT', 108 'OBJECT', 103 109 'OL', 104 110 'OUTPUT', … … 107 113 'PROGRESS', 108 114 'Q', 115 'RB', // Neutralized. 116 'RP', 117 'RT', 118 'RTC', // Neutralized. 109 119 'RUBY', 110 120 'SAMP', … … 120 130 'SUMMARY', 121 131 'SUP', 132 'TABLE', 122 133 'TIME', 123 134 'TT', … … 168 179 public static function data_unsupported_elements() { 169 180 $unsupported_elements = array( 170 'APPLET', // Deprecated.171 181 'BASE', 172 182 'BGSOUND', // Deprecated; self-closing if self-closing flag provided, otherwise normal. … … 175 185 'COL', 176 186 'COLGROUP', 177 'FORM',178 187 'FRAME', 179 188 'FRAMESET', … … 182 191 'IFRAME', 183 192 'LINK', 184 'MARQUEE', // Deprecated.185 193 'MATH', 186 194 'META', 187 'NOBR', // Neutralized.188 195 'NOEMBED', // Neutralized. 189 196 'NOFRAMES', // Neutralized. 190 'NOSCRIPT',191 'OBJECT',192 197 'PLAINTEXT', // Neutralized. 193 'RB', // Neutralized.194 'RP',195 'RT',196 'RTC', // Neutralized.197 198 'SCRIPT', 198 199 'STYLE', 199 200 'SVG', 200 'TABLE',201 201 'TBODY', 202 202 'TD', -
trunk/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
r58742 r58779 32 32 */ 33 33 const SKIP_TESTS = array( 34 'adoption01/line0046' => 'Unimplemented: Reconstruction of active formatting elements.', 35 'adoption01/line0159' => 'Unimplemented: Reconstruction of active formatting elements.', 36 'adoption01/line0318' => 'Unimplemented: Reconstruction of active formatting elements.', 37 'inbody01/line0001' => 'Bug.', 38 'inbody01/line0014' => 'Bug.', 39 'inbody01/line0029' => 'Bug.', 40 'menuitem-element/line0012' => 'Bug.', 41 'tests1/line0342' => "Closing P tag implicitly creates opener, which we don't visit.", 42 'tests1/line0720' => 'Unimplemented: Reconstruction of active formatting elements.', 43 'tests15/line0001' => 'Unimplemented: Reconstruction of active formatting elements.', 44 'tests15/line0022' => 'Unimplemented: Reconstruction of active formatting elements.', 45 'tests2/line0650' => 'Whitespace only test never enters "in body" parsing mode.', 46 'tests20/line0497' => "Closing P tag implicitly creates opener, which we don't visit.", 47 'tests23/line0001' => 'Unimplemented: Reconstruction of active formatting elements.', 48 'tests23/line0041' => 'Unimplemented: Reconstruction of active formatting elements.', 49 'tests23/line0069' => 'Unimplemented: Reconstruction of active formatting elements.', 50 'tests23/line0101' => 'Unimplemented: Reconstruction of active formatting elements.', 51 'tests25/line0169' => 'Bug.', 52 'tests26/line0263' => 'Bug: An active formatting element should be created for a trailing text node.', 34 'adoption01/line0046' => 'Unimplemented: Reconstruction of active formatting elements.', 35 'adoption01/line0159' => 'Unimplemented: Reconstruction of active formatting elements.', 36 'adoption01/line0318' => 'Unimplemented: Reconstruction of active formatting elements.', 37 'tests1/line0720' => 'Unimplemented: Reconstruction of active formatting elements.', 38 'tests15/line0001' => 'Unimplemented: Reconstruction of active formatting elements.', 39 'tests15/line0022' => 'Unimplemented: Reconstruction of active formatting elements.', 40 'tests15/line0068' => 'Unimplemented: no support outside of IN BODY yet.', 41 'tests2/line0650' => 'Whitespace only test never enters "in body" parsing mode.', 42 'tests19/line0965' => 'Unimplemented: no support outside of IN BODY yet.', 43 'tests23/line0001' => 'Unimplemented: Reconstruction of active formatting elements.', 44 'tests23/line0041' => 'Unimplemented: Reconstruction of active formatting elements.', 45 'tests23/line0069' => 'Unimplemented: Reconstruction of active formatting elements.', 46 'tests23/line0101' => 'Unimplemented: Reconstruction of active formatting elements.', 47 'tests26/line0263' => 'Bug: An active formatting element should be created for a trailing text node.', 48 'webkit01/line0231' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', 49 'webkit02/line0013' => "Asserting behavior with scripting flag enabled, which this parser doesn't support.", 50 'webkit01/line0300' => 'Unimplemented: no support outside of IN BODY yet.', 51 'webkit01/line0310' => 'Unimplemented: no support outside of IN BODY yet.', 52 'webkit01/line0336' => 'Unimplemented: no support outside of IN BODY yet.', 53 'webkit01/line0349' => 'Unimplemented: no support outside of IN BODY yet.', 54 'webkit01/line0362' => 'Unimplemented: no support outside of IN BODY yet.', 55 'webkit01/line0375' => 'Unimplemented: no support outside of IN BODY yet.', 53 56 ); 54 57 … … 199 202 $output .= str_repeat( $indent, $tag_indent + 1 ) . "{$attribute_name}=\"{$val}\"\n"; 200 203 } 201 202 // Self-contained tags contain their inner contents as modifiable text. 203 $modifiable_text = $processor->get_modifiable_text(); 204 if ( '' !== $modifiable_text ) { 205 $was_text = true; 206 if ( '' === $text_node ) { 207 $text_node = str_repeat( $indent, $indent_level ) . '"'; 208 } 209 $text_node .= $modifiable_text; 210 --$indent_level; 211 } 204 } 205 206 // Self-contained tags contain their inner contents as modifiable text. 207 $modifiable_text = $processor->get_modifiable_text(); 208 if ( '' !== $modifiable_text ) { 209 $output .= str_repeat( $indent, $indent_level ) . "\"{$modifiable_text}\"\n"; 210 } 211 212 if ( ! $processor->is_void( $tag_name ) && ! $processor->expects_closer() ) { 213 --$indent_level; 212 214 } 213 215 … … 226 228 case WP_HTML_Processor::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT: 227 229 case WP_HTML_Processor::COMMENT_AS_HTML_COMMENT: 230 case WP_HTML_Processor::COMMENT_AS_INVALID_HTML: 228 231 $comment_text_content = $processor->get_modifiable_text(); 229 232 break; -
trunk/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php
r58713 r58779 407 407 408 408 /** 409 * Ensures that support isn't accidentally partially added for the closing BR tag `</br>`. 410 * 411 * This tag closer has special rules and support shouldn't be added without implementing full support. 409 * Ensures that closing `</br>` tags are appropriately treated as opening tags with no attributes. 412 410 * 413 411 * > An end tag whose tag name is "br" … … 416 414 * > tag token that it actually is. 417 415 * 418 * When this handling is implemented, this test should be removed. It's not incorporated419 * into the existing unsupported tag behavior test because the opening tag is supported;420 * only the closing tag isn't.421 *422 416 * @covers WP_HTML_Processor::step_in_body 423 417 * … … 425 419 */ 426 420 public function test_br_end_tag_unsupported() { 427 $processor = WP_HTML_Processor::create_fragment( '</br>' ); 428 429 $this->assertFalse( $processor->next_tag(), 'Found a BR tag that should not be handled.' ); 430 $this->assertSame( WP_HTML_Processor::ERROR_UNSUPPORTED, $processor->get_last_error() ); 421 $processor = WP_HTML_Processor::create_fragment( '</br id="an-opener" html>' ); 422 423 $this->assertTrue( $processor->next_tag(), 'Failed to find the expected opening BR tag.' ); 424 $this->assertFalse( $processor->is_tag_closer(), 'Should have treated the tag as an opening tag.' ); 425 $this->assertNull( $processor->get_attribute_names_with_prefix( '' ), 'Should have ignored any attributes on the tag.' ); 431 426 } 432 427 } -
trunk/tests/phpunit/tests/html-api/wpHtmlSupportRequiredHtmlProcessor.php
r58677 r58779 1 <?php2 /**3 * Unit tests for the HTML API indicating that changes are needed to the4 * WP_HTML_Processor class before specific features are added to the API.5 *6 * Note! Duplication of test cases and the helper function in this file are intentional.7 * This test file exists to warn developers of related areas of code that need to update8 * together when adding support for new elements to the HTML Processor. For example,9 * when adding support for the LI element it's necessary to update the function which10 * generates implied end tags. This is because each element might bring with it semantic11 * rules that impact the way the document should be parsed.12 *13 * Without these tests a developer needs to investigate all possible places they14 * might need to update when adding support for more elements and risks overlooking15 * important parts that, in the absence of the related support, will lead to errors.16 *17 * @package WordPress18 * @subpackage HTML-API19 *20 * @since 6.4.021 *22 * @group html-api23 *24 * @coversDefaultClass WP_HTML_Processor25 */26 class Tests_HtmlApi_WpHtmlSupportRequiredHtmlProcessor extends WP_UnitTestCase {27 /**28 * Fails to assert if the HTML Processor handles the given tag.29 *30 * This test helper is used throughout this test file for one purpose only: to31 * fail a test if the HTML Processor handles the given tag. In other words, it32 * ensures that the HTML Processor aborts when encountering the given tag.33 *34 * This is used to ensure that when support for a new tag is added to the35 * HTML Processor it receives full support and not partial support, which36 * could lead to a variety of issues.37 *38 * Do not remove this helper function as it provides semantic meaning to the39 * assertions in the tests in this file and its behavior is incredibly specific40 * and limited and doesn't warrant adding a new abstraction into WP_UnitTestCase.41 *42 * @param string $tag_name the HTML Processor should abort when encountering this tag, e.g. "BUTTON".43 */44 private function ensure_support_is_added_everywhere( $tag_name ) {45 $processor = WP_HTML_Processor::create_fragment( "<$tag_name>" );46 47 $this->assertFalse( $processor->step(), "Must support terminating elements in specific scope check before adding support for the {$tag_name} element." );48 }49 50 /**51 * Generating implied end tags walks up the stack of open elements52 * as long as any of the following missing elements is the current node.53 *54 * @since 6.4.055 *56 * @ticket 5890757 *58 * @covers WP_HTML_Processor::generate_implied_end_tags59 */60 public function test_generate_implied_end_tags_needs_support() {61 $this->ensure_support_is_added_everywhere( 'RB' );62 $this->ensure_support_is_added_everywhere( 'RP' );63 $this->ensure_support_is_added_everywhere( 'RT' );64 $this->ensure_support_is_added_everywhere( 'RTC' );65 }66 67 /**68 * Generating implied end tags thoroughly walks up the stack of open elements69 * as long as any of the following missing elements is the current node.70 *71 * @since 6.4.072 *73 * @ticket 5890774 *75 * @covers WP_HTML_Processor::generate_implied_end_tags_thoroughly76 */77 public function test_generate_implied_end_tags_thoroughly_needs_support() {78 $this->ensure_support_is_added_everywhere( 'CAPTION' );79 $this->ensure_support_is_added_everywhere( 'COLGROUP' );80 $this->ensure_support_is_added_everywhere( 'RB' );81 $this->ensure_support_is_added_everywhere( 'RP' );82 $this->ensure_support_is_added_everywhere( 'RT' );83 $this->ensure_support_is_added_everywhere( 'RTC' );84 $this->ensure_support_is_added_everywhere( 'TBODY' );85 $this->ensure_support_is_added_everywhere( 'TD' );86 $this->ensure_support_is_added_everywhere( 'TFOOT' );87 $this->ensure_support_is_added_everywhere( 'TH' );88 $this->ensure_support_is_added_everywhere( 'HEAD' );89 $this->ensure_support_is_added_everywhere( 'TR' );90 }91 } -
trunk/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php
r58677 r58779 62 62 */ 63 63 public function test_has_element_in_scope_needs_support() { 64 // These elements impact all scopes.65 $this->ensure_support_is_added_everywhere( 'APPLET' );66 $this->ensure_support_is_added_everywhere( 'CAPTION' );67 $this->ensure_support_is_added_everywhere( 'HTML' );68 $this->ensure_support_is_added_everywhere( 'TABLE' );69 $this->ensure_support_is_added_everywhere( 'TD' );70 $this->ensure_support_is_added_everywhere( 'TH' );71 $this->ensure_support_is_added_everywhere( 'MARQUEE' );72 $this->ensure_support_is_added_everywhere( 'OBJECT' );73 $this->ensure_support_is_added_everywhere( 'TEMPLATE' );74 75 64 // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. 76 65 $this->ensure_support_is_added_everywhere( 'MATH' ); … … 100 89 */ 101 90 public function test_has_element_in_list_item_scope_needs_support() { 102 // These elements impact all scopes.103 $this->ensure_support_is_added_everywhere( 'APPLET' );104 $this->ensure_support_is_added_everywhere( 'CAPTION' );105 $this->ensure_support_is_added_everywhere( 'HTML' );106 $this->ensure_support_is_added_everywhere( 'TABLE' );107 $this->ensure_support_is_added_everywhere( 'TD' );108 $this->ensure_support_is_added_everywhere( 'TH' );109 $this->ensure_support_is_added_everywhere( 'MARQUEE' );110 $this->ensure_support_is_added_everywhere( 'OBJECT' );111 $this->ensure_support_is_added_everywhere( 'TEMPLATE' );112 113 91 // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. 114 92 $this->ensure_support_is_added_everywhere( 'MATH' ); … … 134 112 */ 135 113 public function test_has_element_in_button_scope_needs_support() { 136 // These elements impact all scopes.137 $this->ensure_support_is_added_everywhere( 'APPLET' );138 $this->ensure_support_is_added_everywhere( 'CAPTION' );139 $this->ensure_support_is_added_everywhere( 'HTML' );140 $this->ensure_support_is_added_everywhere( 'TABLE' );141 $this->ensure_support_is_added_everywhere( 'TD' );142 $this->ensure_support_is_added_everywhere( 'TH' );143 $this->ensure_support_is_added_everywhere( 'MARQUEE' );144 $this->ensure_support_is_added_everywhere( 'OBJECT' );145 $this->ensure_support_is_added_everywhere( 'TEMPLATE' );146 147 114 // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. 148 115 $this->ensure_support_is_added_everywhere( 'MATH' ); … … 169 136 */ 170 137 public function test_after_element_pop_must_maintain_p_in_button_scope_flag() { 171 // These elements impact all scopes.172 $this->ensure_support_is_added_everywhere( 'APPLET' );173 $this->ensure_support_is_added_everywhere( 'CAPTION' );174 $this->ensure_support_is_added_everywhere( 'HTML' );175 $this->ensure_support_is_added_everywhere( 'TABLE' );176 $this->ensure_support_is_added_everywhere( 'TD' );177 $this->ensure_support_is_added_everywhere( 'TH' );178 $this->ensure_support_is_added_everywhere( 'MARQUEE' );179 $this->ensure_support_is_added_everywhere( 'OBJECT' );180 $this->ensure_support_is_added_everywhere( 'TEMPLATE' );181 182 138 // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. 183 139 $this->ensure_support_is_added_everywhere( 'MATH' ); … … 204 160 */ 205 161 public function test_after_element_push_must_maintain_p_in_button_scope_flag() { 206 // These elements impact all scopes.207 $this->ensure_support_is_added_everywhere( 'APPLET' );208 $this->ensure_support_is_added_everywhere( 'CAPTION' );209 $this->ensure_support_is_added_everywhere( 'HTML' );210 $this->ensure_support_is_added_everywhere( 'TABLE' );211 $this->ensure_support_is_added_everywhere( 'TD' );212 $this->ensure_support_is_added_everywhere( 'TH' );213 $this->ensure_support_is_added_everywhere( 'MARQUEE' );214 $this->ensure_support_is_added_everywhere( 'OBJECT' );215 $this->ensure_support_is_added_everywhere( 'TEMPLATE' );216 217 162 // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. 218 163 $this->ensure_support_is_added_everywhere( 'MATH' ); … … 238 183 */ 239 184 public function test_has_element_in_table_scope_needs_support() { 240 // These elements impact all scopes. 241 $this->ensure_support_is_added_everywhere( 'APPLET' ); 242 $this->ensure_support_is_added_everywhere( 'CAPTION' ); 243 $this->ensure_support_is_added_everywhere( 'HTML' ); 244 $this->ensure_support_is_added_everywhere( 'TABLE' ); 245 $this->ensure_support_is_added_everywhere( 'TD' ); 246 $this->ensure_support_is_added_everywhere( 'TH' ); 247 $this->ensure_support_is_added_everywhere( 'MARQUEE' ); 248 $this->ensure_support_is_added_everywhere( 'OBJECT' ); 249 $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); 250 251 // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. 252 $this->ensure_support_is_added_everywhere( 'MATH' ); 253 254 /* 255 * SVG elements: note that TITLE is both an HTML element and an SVG element 256 * so care must be taken when adding support for either one. 257 * 258 * FOREIGNOBJECT, DESC, TITLE. 259 */ 260 $this->ensure_support_is_added_everywhere( 'SVG' ); 261 262 // These elements are specific to TABLE scope. 263 $this->ensure_support_is_added_everywhere( 'HTML' ); 264 $this->ensure_support_is_added_everywhere( 'TABLE' ); 265 $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); 266 267 // These elements depend on table scope. 268 $this->ensure_support_is_added_everywhere( 'CAPTION' ); 269 $this->ensure_support_is_added_everywhere( 'COL' ); 270 $this->ensure_support_is_added_everywhere( 'COLGROUP' ); 271 $this->ensure_support_is_added_everywhere( 'TBODY' ); 272 $this->ensure_support_is_added_everywhere( 'TD' ); 273 $this->ensure_support_is_added_everywhere( 'TFOOT' ); 274 $this->ensure_support_is_added_everywhere( 'TH' ); 275 $this->ensure_support_is_added_everywhere( 'THEAD' ); 276 $this->ensure_support_is_added_everywhere( 'TR' ); 185 // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. 186 $this->ensure_support_is_added_everywhere( 'MATH' ); 187 188 /* 189 * SVG elements: note that TITLE is both an HTML element and an SVG element 190 * so care must be taken when adding support for either one. 191 * 192 * FOREIGNOBJECT, DESC, TITLE. 193 */ 194 $this->ensure_support_is_added_everywhere( 'SVG' ); 277 195 } 278 196 … … 288 206 */ 289 207 public function test_has_element_in_select_scope_needs_support() { 290 // These elements impact all scopes.291 $this->ensure_support_is_added_everywhere( 'APPLET' );292 $this->ensure_support_is_added_everywhere( 'CAPTION' );293 $this->ensure_support_is_added_everywhere( 'HTML' );294 $this->ensure_support_is_added_everywhere( 'TABLE' );295 $this->ensure_support_is_added_everywhere( 'TD' );296 $this->ensure_support_is_added_everywhere( 'TH' );297 $this->ensure_support_is_added_everywhere( 'MARQUEE' );298 $this->ensure_support_is_added_everywhere( 'OBJECT' );299 $this->ensure_support_is_added_everywhere( 'TEMPLATE' );300 301 208 // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. 302 209 $this->ensure_support_is_added_everywhere( 'MATH' ); -
trunk/tests/phpunit/tests/html-api/wpHtmlTagProcessor-token-scanning.php
r58040 r58779 58 58 59 59 /** 60 * Ensures that `get_modifiable_text()` properly transforms text content. 61 * 62 * The newline and NULL byte (U+0000) behaviors can be complicated since they depend 63 * on where the bytes were found and whether they were raw bytes in the input stream 64 * or decoded from character references. 65 * 66 * @ticket 61576 67 * 68 * @dataProvider data_modifiable_text_needing_transformation 69 * 70 * @param string $html_with_target_node HTML with node containing `target` or `target-next` attribute. 71 * @param string $expected_modifiable_text Expected modifiable text from target node or following node. 72 */ 73 public function test_modifiable_text_proper_transforms( string $html_with_target_node, string $expected_modifiable_text ) { 74 $processor = new WP_HTML_Tag_Processor( $html_with_target_node ); 75 76 // Find the expected target node. 77 while ( $processor->next_token() ) { 78 $target = $processor->get_attribute( 'target' ); 79 if ( true === $target ) { 80 break; 81 } 82 83 if ( is_numeric( $target ) ) { 84 for ( $i = (int) $target; $i > 0; $i-- ) { 85 $processor->next_token(); 86 } 87 break; 88 } 89 } 90 91 $this->assertSame( 92 $expected_modifiable_text, 93 $processor->get_modifiable_text(), 94 "Should have properly decoded and transformed modifiable text, but didn't." 95 ); 96 } 97 98 /** 99 * Data provider. 100 * 101 * @return array[]. 102 */ 103 public static function data_modifiable_text_needing_transformation() { 104 return array( 105 'Text node + NULL byte' => array( "<span target=1>NULL byte in \x00 text nodes disappears.", 'NULL byte in text nodes disappears.' ), 106 'LISTING + newline' => array( "<listing target=1>\nNo newline</listing>", 'No newline' ), 107 'LISTING + CR + LF' => array( "<listing target=1>\r\nNo newline</listing>", 'No newline' ), 108 'LISTING + Encoded LF' => array( '<listing target=1>
No newline</listing>', 'No newline' ), 109 'LISTING + Encoded CR' => array( '<listing target=1>
Newline</listing>', "\rNewline" ), 110 'LISTING + Encoded CR + LF' => array( '<listing target=1>
Newline</listing>', "\r\nNewline" ), 111 'PRE + newline' => array( "<pre target=1>\nNo newline</pre>", 'No newline' ), 112 'PRE + CR + LF' => array( "<pre target=1>\r\nNo newline</pre>", 'No newline' ), 113 'PRE + Encoded LF' => array( '<pre target=1>
No newline</pre>', 'No newline' ), 114 'PRE + Encoded CR' => array( '<pre target=1>
Newline</pre>', "\rNewline" ), 115 'PRE + Encoded CR + LF' => array( '<pre target=1>
Newline</pre>', "\r\nNewline" ), 116 'TEXTAREA + newline' => array( "<textarea target>\nNo newline</textarea>", 'No newline' ), 117 'TEXTAREA + CR + LF' => array( "<textarea target>\r\nNo newline</textarea>", 'No newline' ), 118 'TEXTAREA + Encoded LF' => array( '<textarea target>
No newline</textarea>', 'No newline' ), 119 'TEXTAREA + Encoded CR' => array( '<textarea target>
Newline</textarea>', "\rNewline" ), 120 'TEXTAREA + Encoded CR + LF' => array( '<textarea target>
Newline</textarea>', "\r\nNewline" ), 121 'TEXTAREA + Comment-like' => array( "<textarea target><!-- comment -->\nNo newline</textarea>", "<!-- comment -->\nNo newline" ), 122 'PRE + Comment' => array( "<pre target=2><!-- comment -->\nNo newline</pre>", "\nNo newline" ), 123 'PRE + CDATA-like' => array( "<pre target=2><![CDATA[test]]>\nNo newline</pre>", "\nNo newline" ), 124 'LISTING + NULL byte' => array( "<listing target=1>\x00 is missing</listing>", ' is missing' ), 125 'PRE + NULL byte' => array( "<pre target=1>\x00 is missing</pre>", ' is missing' ), 126 'TEXTAREA + NULL byte' => array( "<textarea target>\x00 is U+FFFD</textarea>", "\u{FFFD} is U+FFFD" ), 127 'SCRIPT + NULL byte' => array( "<script target>\x00 is U+FFFD</script>", "\u{FFFD} is U+FFFD" ), 128 'esc(SCRIPT) + NULL byte' => array( "<script target><!-- <script> \x00 </script> --> is U+FFFD</script>", "<!-- <script> \u{FFFD} </script> --> is U+FFFD" ), 129 'STYLE + NULL byte' => array( "<style target>\x00 is U+FFFD</style>", "\u{FFFD} is U+FFFD" ), 130 'XMP + NULL byte' => array( "<xmp target>\x00 is U+FFFD</xmp>", "\u{FFFD} is U+FFFD" ), 131 'CDATA-like + NULL byte' => array( "<span target=1><![CDATA[just a \x00comment]]>", "just a \u{FFFD}comment" ), 132 'Funky comment + NULL byte' => array( "<span target=1></%just a \x00comment>", "%just a \u{FFFD}comment" ), 133 ); 134 } 135 136 /** 60 137 * Ensures that normative Elements are properly parsed. 61 138 *
Note: See TracChangeset
for help on using the changeset viewer.