Changeset 59444
- Timestamp:
- 11/21/2024 01:27:58 PM (2 months ago)
- Location:
- trunk
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/html-api/class-wp-html-processor.php
r59422 r59444 426 426 427 427 /** 428 * Creates a fragment processor at the current node. 429 * 430 * HTML Fragment parsing always happens with a context node. HTML Fragment Processors can be 431 * instantiated with a `BODY` context node via `WP_HTML_Processor::create_fragment( $html )`. 432 * 433 * The context node may impact how a fragment of HTML is parsed. For example, consider the HTML 434 * fragment `<td />Inside TD?</td>`. 435 * 436 * A BODY context node will produce the following tree: 437 * 438 * └─#text Inside TD? 439 * 440 * Notice that the `<td>` tags are completely ignored. 441 * 442 * Compare that with an SVG context node that produces the following tree: 443 * 444 * ├─svg:td 445 * └─#text Inside TD? 446 * 447 * Here, a `td` node in the `svg` namespace is created, and its self-closing flag is respected. 448 * This is a peculiarity of parsing HTML in foreign content like SVG. 449 * 450 * Finally, consider the tree produced with a TABLE context node: 451 * 452 * └─TBODY 453 * └─TR 454 * └─TD 455 * └─#text Inside TD? 456 * 457 * These examples demonstrate how important the context node may be when processing an HTML 458 * fragment. Special care must be taken when processing fragments that are expected to appear 459 * in specific contexts. SVG and TABLE are good examples, but there are others. 460 * 461 * @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm 462 * 463 * @param string $html Input HTML fragment to process. 464 * @return static|null The created processor if successful, otherwise null. 465 */ 466 public function create_fragment_at_current_node( string $html ) { 467 if ( $this->get_token_type() !== '#tag' ) { 468 return null; 469 } 470 471 $namespace = $this->current_element->token->namespace; 472 473 /* 474 * Prevent creating fragments at nodes that require a special tokenizer state. 475 * This is unsupported by the HTML Processor. 476 */ 477 if ( 478 'html' === $namespace && 479 in_array( $this->current_element->token->node_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP', 'PLAINTEXT' ), true ) 480 ) { 481 return null; 482 } 483 484 $fragment_processor = static::create_fragment( $html ); 485 if ( null === $fragment_processor ) { 486 return null; 487 } 488 489 $fragment_processor->compat_mode = $this->compat_mode; 490 491 $fragment_processor->context_node = clone $this->state->current_token; 492 $fragment_processor->context_node->bookmark_name = 'context-node'; 493 $fragment_processor->context_node->on_destroy = null; 494 495 $fragment_processor->state->context_node = array( $fragment_processor->context_node->node_name, array() ); 496 497 $attribute_names = $this->get_attribute_names_with_prefix( '' ); 498 if ( null !== $attribute_names ) { 499 foreach ( $attribute_names as $name ) { 500 $fragment_processor->state->context_node[1][ $name ] = $this->get_attribute( $name ); 501 } 502 } 503 504 $fragment_processor->breadcrumbs = array( 'HTML', $fragment_processor->context_node->node_name ); 505 506 if ( 'TEMPLATE' === $fragment_processor->context_node->node_name ) { 507 $fragment_processor->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; 508 } 509 510 $fragment_processor->reset_insertion_mode_appropriately(); 511 512 /* 513 * > Set the parser's form element pointer to the nearest node to the context element that 514 * > is a form element (going straight up the ancestor chain, and including the element 515 * > itself, if it is a form element), if any. (If there is no such form element, the 516 * > form element pointer keeps its initial value, null.) 517 */ 518 foreach ( $this->state->stack_of_open_elements->walk_up() as $element ) { 519 if ( 'FORM' === $element->node_name && 'html' === $element->namespace ) { 520 $fragment_processor->state->form_element = clone $element; 521 $fragment_processor->state->form_element->bookmark_name = null; 522 $fragment_processor->state->form_element->on_destroy = null; 523 break; 524 } 525 } 526 527 $fragment_processor->state->encoding_confidence = 'irrelevant'; 528 529 /* 530 * Update the parsing namespace near the end of the process. 531 * This is important so that any push/pop from the stack of open 532 * elements does not change the parsing namespace. 533 */ 534 $fragment_processor->change_parsing_namespace( 535 $this->current_element->token->integration_node_type ? 'html' : $namespace 536 ); 537 538 return $fragment_processor; 539 } 540 541 /** 428 542 * Stops the parser and terminates its execution when encountering unsupported markup. 429 543 * -
trunk/tests/phpunit/tests/html-api/wpHtmlProcessor.php
r59422 r59444 1045 1045 1046 1046 /** 1047 * @ticket 62357 1048 */ 1049 public function test_create_fragment_at_current_node_in_foreign_content() { 1050 $processor = WP_HTML_Processor::create_full_parser( '<svg>' ); 1051 $this->assertTrue( $processor->next_tag( 'SVG' ) ); 1052 1053 $fragment = $processor->create_fragment_at_current_node( "\0preceded-by-nul-byte<rect /><circle></circle><foreignobject><div></div></foreignobject><g>" ); 1054 1055 $this->assertSame( 'svg', $fragment->get_namespace() ); 1056 $this->assertTrue( $fragment->next_token() ); 1057 1058 /* 1059 * In HTML parsing, a nul byte would be ignored. 1060 * In SVG it should be replaced with a replacement character. 1061 */ 1062 $this->assertSame( '#text', $fragment->get_token_type() ); 1063 $this->assertSame( "\u{FFFD}", $fragment->get_modifiable_text() ); 1064 1065 $this->assertTrue( $fragment->next_tag( 'RECT' ) ); 1066 $this->assertSame( 'svg', $fragment->get_namespace() ); 1067 1068 $this->assertTrue( $fragment->next_tag( 'CIRCLE' ) ); 1069 $this->assertSame( array( 'HTML', 'SVG', 'CIRCLE' ), $fragment->get_breadcrumbs() ); 1070 $this->assertTrue( $fragment->next_tag( 'foreignObject' ) ); 1071 $this->assertSame( 'svg', $fragment->get_namespace() ); 1072 } 1073 1074 /** 1075 * @ticket 62357 1076 */ 1077 public function test_create_fragment_at_current_node_in_foreign_content_integration_point() { 1078 $processor = WP_HTML_Processor::create_full_parser( '<svg><foreignObject>' ); 1079 $this->assertTrue( $processor->next_tag( 'foreignObject' ) ); 1080 1081 $fragment = $processor->create_fragment_at_current_node( "<image>\0not-preceded-by-nul-byte<rect />" ); 1082 1083 // Nothing has been processed, the html namespace should be used for parsing as an integration point. 1084 $this->assertSame( 'html', $fragment->get_namespace() ); 1085 1086 // HTML parsing transforms IMAGE into IMG. 1087 $this->assertTrue( $fragment->next_tag( 'IMG' ) ); 1088 1089 $this->assertTrue( $fragment->next_token() ); 1090 1091 // In HTML parsing, the nul byte is ignored and the text is reached. 1092 $this->assertSame( '#text', $fragment->get_token_type() ); 1093 $this->assertSame( 'not-preceded-by-nul-byte', $fragment->get_modifiable_text() ); 1094 1095 /* 1096 * svg:foreignObject is an HTML integration point, so the processor should be in the HTML namespace. 1097 * RECT is an HTML element here, meaning it may have the self-closing flag but does not self-close. 1098 */ 1099 $this->assertTrue( $fragment->next_tag( 'RECT' ) ); 1100 $this->assertSame( array( 'HTML', 'FOREIGNOBJECT', 'RECT' ), $fragment->get_breadcrumbs() ); 1101 $this->assertSame( 'html', $fragment->get_namespace() ); 1102 $this->assertTrue( $fragment->has_self_closing_flag() ); 1103 $this->assertTrue( $fragment->expects_closer() ); 1104 } 1105 1106 /** 1047 1107 * Ensure that lowercased tag_name query matches tags case-insensitively. 1048 1108 * -
trunk/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
r59075 r59444 139 139 */ 140 140 private static function should_skip_test( ?string $test_context_element, string $test_name ): bool { 141 if ( null !== $test_context_element && 'body' !== $test_context_element ) {142 return true;143 }144 145 141 if ( array_key_exists( $test_name, self::SKIP_TESTS ) ) { 146 142 return true; … … 158 154 */ 159 155 private static function build_tree_representation( ?string $fragment_context, string $html ) { 160 $processor = $fragment_context 161 ? WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" ) 162 : WP_HTML_Processor::create_full_parser( $html ); 163 if ( null === $processor ) { 164 throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() ); 165 } 166 167 /* 168 * The fragment parser will start in 2 levels deep at: html > body > [position] 169 * and requires adjustment to initial parameters. 170 * The full parser will not. 171 */ 156 $processor = null; 157 if ( $fragment_context ) { 158 if ( 'body' === $fragment_context ) { 159 $processor = WP_HTML_Processor::create_fragment( $html ); 160 } else { 161 162 /* 163 * If the string of characters starts with "svg ", the context 164 * element is in the SVG namespace and the substring after 165 * "svg " is the local name. If the string of characters starts 166 * with "math ", the context element is in the MathML namespace 167 * and the substring after "math " is the local name. 168 * Otherwise, the context element is in the HTML namespace and 169 * the string is the local name. 170 */ 171 if ( str_starts_with( $fragment_context, 'svg ' ) ) { 172 $tag_name = substr( $fragment_context, 4 ); 173 if ( 'svg' === $tag_name ) { 174 $parent_processor = WP_HTML_Processor::create_full_parser( '<!DOCTYPE html><svg>' ); 175 } else { 176 $parent_processor = WP_HTML_Processor::create_full_parser( "<!DOCTYPE html><svg><{$tag_name}>" ); 177 } 178 $parent_processor->next_tag( $tag_name ); 179 } elseif ( str_starts_with( $fragment_context, 'math ' ) ) { 180 $tag_name = substr( $fragment_context, 5 ); 181 if ( 'math' === $tag_name ) { 182 $parent_processor = WP_HTML_Processor::create_full_parser( '<!DOCTYPE html><math>' ); 183 } else { 184 $parent_processor = WP_HTML_Processor::create_full_parser( "<!DOCTYPE html><math><{$tag_name}>" ); 185 } 186 $parent_processor->next_tag( $tag_name ); 187 } else { 188 if ( in_array( 189 $fragment_context, 190 array( 191 'caption', 192 'col', 193 'colgroup', 194 'tbody', 195 'td', 196 'tfoot', 197 'th', 198 'thead', 199 'tr', 200 ), 201 true 202 ) ) { 203 $parent_processor = WP_HTML_Processor::create_full_parser( "<!DOCTYPE html><table><{$fragment_context}>" ); 204 $parent_processor->next_tag(); 205 } else { 206 $parent_processor = WP_HTML_Processor::create_full_parser( "<!DOCTYPE html><{$fragment_context}>" ); 207 } 208 $parent_processor->next_tag( $fragment_context ); 209 } 210 if ( null !== $parent_processor->get_unsupported_exception() ) { 211 throw $parent_processor->get_unsupported_exception(); 212 } 213 if ( null !== $parent_processor->get_last_error() ) { 214 throw new Exception( $parent_processor->get_last_error() ); 215 } 216 $processor = $parent_processor->create_fragment_at_current_node( $html ); 217 } 218 219 if ( null === $processor ) { 220 throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() ); 221 } 222 } else { 223 $processor = WP_HTML_Processor::create_full_parser( $html ); 224 if ( null === $processor ) { 225 throw new Exception( 'Could not create a full parser.' ); 226 } 227 } 228 172 229 $output = ''; 173 230 $indent_level = 0;
Note: See TracChangeset
for help on using the changeset viewer.