Changeset 59467
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/html-api/class-wp-html-processor.php
r59463 r59467 280 280 * impact the parse, such as with a SCRIPT tag and its `type` attribute. 281 281 * 282 * ## Current HTML Support 283 * 284 * - The only supported context is `<body>`, which is the default value. 285 * - The only supported document encoding is `UTF-8`, which is the default value. 282 * Example: 283 * 284 * // Usually, snippets of HTML ought to be processed in the default `<body>` context: 285 * $processor = WP_HTML_Processor::create_fragment( '<p>Hi</p>' ); 286 * 287 * // Some fragments should be processed in the correct context like this SVG: 288 * $processor = WP_HTML_Processor::create_fragment( '<rect width="10" height="10" />', '<svg>' ); 289 * 290 * // This fragment with TD tags should be processed in a TR context: 291 * $processor = WP_HTML_Processor::create_fragment( 292 * '<td>1<td>2<td>3', 293 * '<table><tbody><tr>' 294 * ); 295 * 296 * In order to create a fragment processor at the correct location, the 297 * provided fragment will be processed as part of a full HTML document. 298 * The processor will search for the last opener tag in the document and 299 * create a fragment processor at that location. The document will be 300 * forced into "no-quirks" mode by including the HTML5 doctype. 301 * 302 * For advanced usage and precise control over the context element, use 303 * `WP_HTML_Processor::create_full_processor()` and 304 * `WP_HTML_Processor::create_fragment_at_current_node()`. 305 * 306 * UTF-8 is the only allowed encoding. If working with a document that 307 * isn't UTF-8, first convert the document to UTF-8, then pass in the 308 * converted HTML. 286 309 * 287 310 * @since 6.4.0 288 311 * @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances. 312 * @since 6.8.0 Can create fragments with any context element. 289 313 * 290 314 * @param string $html Input HTML fragment to process. 291 * @param string $context Context element for the fragment , must be default of`<body>`.315 * @param string $context Context element for the fragment. Defaults to `<body>`. 292 316 * @param string $encoding Text encoding of the document; must be default of 'UTF-8'. 293 317 * @return static|null The created processor if successful, otherwise null. 294 318 */ 295 319 public static function create_fragment( $html, $context = '<body>', $encoding = 'UTF-8' ) { 296 if ( '<body>' !== $context || 'UTF-8' !== $encoding ) { 320 $context_processor = static::create_full_parser( "<!DOCTYPE html>{$context}", $encoding ); 321 if ( null === $context_processor ) { 297 322 return null; 298 323 } 299 324 300 $processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE ); 301 $processor->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; 302 $processor->state->encoding = $encoding; 303 $processor->state->encoding_confidence = 'certain'; 304 305 // @todo Create "fake" bookmarks for non-existent but implied nodes. 306 $processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 ); 307 $processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 ); 308 309 $root_node = new WP_HTML_Token( 310 'root-node', 311 'HTML', 312 false 313 ); 314 315 $processor->state->stack_of_open_elements->push( $root_node ); 316 317 $context_node = new WP_HTML_Token( 318 'context-node', 319 'BODY', 320 false 321 ); 322 323 $processor->context_node = $context_node; 324 $processor->breadcrumbs = array( 'HTML', $context_node->node_name ); 325 326 return $processor; 325 while ( $context_processor->next_tag() ) { 326 $context_processor->set_bookmark( 'final_node' ); 327 } 328 329 if ( 330 ! $context_processor->has_bookmark( 'final_node' ) || 331 ! $context_processor->seek( 'final_node' ) 332 ) { 333 _doing_it_wrong( __METHOD__, __( 'No valid context element was detected.' ), '6.8.0' ); 334 return null; 335 } 336 337 return $context_processor->create_fragment_at_current_node( $html ); 327 338 } 328 339 … … 334 345 * a context node of `<body>`. 335 346 * 336 * Since UTF-8 is the only currently-accepted charset, if working with a337 * document that isn't UTF-8, it's important to convert the document before338 * c reating the processor: pass in the converted HTML.347 * UTF-8 is the only allowed encoding. If working with a document that 348 * isn't UTF-8, first convert the document to UTF-8, then pass in the 349 * converted HTML. 339 350 * 340 351 * @param string $html Input HTML document to process. … … 460 471 * @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm 461 472 * 473 * @since 6.8.0 474 * 462 475 * @param string $html Input HTML fragment to process. 463 476 * @return static|null The created processor if successful, otherwise null. … … 465 478 public function create_fragment_at_current_node( string $html ) { 466 479 if ( $this->get_token_type() !== '#tag' || $this->is_tag_closer() ) { 480 _doing_it_wrong( 481 __METHOD__, 482 __( 'The context element must be a start tag.' ), 483 '6.8.0' 484 ); 467 485 return null; 468 486 } 469 487 488 $tag_name = $this->current_element->token->node_name; 470 489 $namespace = $this->current_element->token->namespace; 490 491 if ( 'html' === $namespace && self::is_void( $tag_name ) ) { 492 _doing_it_wrong( 493 __METHOD__, 494 sprintf( 495 // translators: %s: A tag name like INPUT or BR. 496 __( 'The context element cannot be a void element, found "%s".' ), 497 $tag_name 498 ), 499 '6.8.0' 500 ); 501 return null; 502 } 471 503 472 504 /* … … 476 508 if ( 477 509 'html' === $namespace && 478 in_array( $t his->current_element->token->node_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP', 'PLAINTEXT' ), true )510 in_array( $tag_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP', 'PLAINTEXT' ), true ) 479 511 ) { 512 _doing_it_wrong( 513 __METHOD__, 514 sprintf( 515 // translators: %s: A tag name like IFRAME or TEXTAREA. 516 __( 'The context element "%s" is not supported.' ), 517 $tag_name 518 ), 519 '6.8.0' 520 ); 480 521 return null; 481 522 } 482 523 483 $fragment_processor = static::create_fragment( $html ); 484 if ( null === $fragment_processor ) { 485 return null; 486 } 524 $fragment_processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE ); 487 525 488 526 $fragment_processor->compat_mode = $this->compat_mode; 489 527 490 $fragment_processor->context_node = clone $this->state->current_token; 528 // @todo Create "fake" bookmarks for non-existent but implied nodes. 529 $fragment_processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 ); 530 $root_node = new WP_HTML_Token( 531 'root-node', 532 'HTML', 533 false 534 ); 535 $fragment_processor->state->stack_of_open_elements->push( $root_node ); 536 537 $fragment_processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 ); 538 $fragment_processor->context_node = clone $this->current_element->token; 491 539 $fragment_processor->context_node->bookmark_name = 'context-node'; 492 540 $fragment_processor->context_node->on_destroy = null; -
trunk/tests/phpunit/tests/html-api/wpHtmlProcessor.php
r59450 r59467 1045 1045 1046 1046 /** 1047 * @ticket 623571048 */1049 public function test_create_fragment_at_current_node_in_foreign_content() {1050 $processor = WP_HTML_Processor::create_full_parser( '<svg>' );1051 $this->assertTrue( $processor->next_tag( 'SVG' ) );1052 1053 $fragment = $processor->create_fragment_at_current_node( "\0preceded-by-nul-byte<rect /><circle></circle><foreignobject><div></div></foreignobject><g>" );1054 1055 $this->assertSame( 'svg', $fragment->get_namespace() );1056 $this->assertTrue( $fragment->next_token() );1057 1058 /*1059 * In HTML parsing, a nul byte would be ignored.1060 * In SVG it should be replaced with a replacement character.1061 */1062 $this->assertSame( '#text', $fragment->get_token_type() );1063 $this->assertSame( "\u{FFFD}", $fragment->get_modifiable_text() );1064 1065 $this->assertTrue( $fragment->next_tag( 'RECT' ) );1066 $this->assertSame( 'svg', $fragment->get_namespace() );1067 1068 $this->assertTrue( $fragment->next_tag( 'CIRCLE' ) );1069 $this->assertSame( array( 'HTML', 'SVG', 'CIRCLE' ), $fragment->get_breadcrumbs() );1070 $this->assertTrue( $fragment->next_tag( 'foreignObject' ) );1071 $this->assertSame( 'svg', $fragment->get_namespace() );1072 }1073 1074 /**1075 * @ticket 623571076 */1077 public function test_create_fragment_at_current_node_in_foreign_content_integration_point() {1078 $processor = WP_HTML_Processor::create_full_parser( '<svg><foreignObject>' );1079 $this->assertTrue( $processor->next_tag( 'foreignObject' ) );1080 1081 $fragment = $processor->create_fragment_at_current_node( "<image>\0not-preceded-by-nul-byte<rect />" );1082 1083 // Nothing has been processed, the html namespace should be used for parsing as an integration point.1084 $this->assertSame( 'html', $fragment->get_namespace() );1085 1086 // HTML parsing transforms IMAGE into IMG.1087 $this->assertTrue( $fragment->next_tag( 'IMG' ) );1088 1089 $this->assertTrue( $fragment->next_token() );1090 1091 // In HTML parsing, the nul byte is ignored and the text is reached.1092 $this->assertSame( '#text', $fragment->get_token_type() );1093 $this->assertSame( 'not-preceded-by-nul-byte', $fragment->get_modifiable_text() );1094 1095 /*1096 * svg:foreignObject is an HTML integration point, so the processor should be in the HTML namespace.1097 * RECT is an HTML element here, meaning it may have the self-closing flag but does not self-close.1098 */1099 $this->assertTrue( $fragment->next_tag( 'RECT' ) );1100 $this->assertSame( array( 'HTML', 'FOREIGNOBJECT', 'RECT' ), $fragment->get_breadcrumbs() );1101 $this->assertSame( 'html', $fragment->get_namespace() );1102 $this->assertTrue( $fragment->has_self_closing_flag() );1103 $this->assertTrue( $fragment->expects_closer() );1104 }1105 1106 /**1107 * @ticket 623571108 */1109 public function test_prevent_fragment_creation_on_closers() {1110 $processor = WP_HTML_Processor::create_full_parser( '<p></p>' );1111 $processor->next_tag( 'P' );1112 $processor->next_tag(1113 array(1114 'tag_name' => 'P',1115 'tag_closers' => 'visit',1116 )1117 );1118 $this->assertSame( 'P', $processor->get_tag() );1119 $this->assertTrue( $processor->is_tag_closer() );1120 $this->assertNull( $processor->create_fragment_at_current_node( '<i>fragment HTML</i>' ) );1121 }1122 1123 /**1124 1047 * Ensure that lowercased tag_name query matches tags case-insensitively. 1125 1048 * -
trunk/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
r59444 r59467 154 154 */ 155 155 private static function build_tree_representation( ?string $fragment_context, string $html ) { 156 $processor = null;157 156 if ( $fragment_context ) { 158 if ( 'body' === $fragment_context ) { 159 $processor = WP_HTML_Processor::create_fragment( $html ); 157 /* 158 * If the string of characters starts with "svg ", the context 159 * element is in the SVG namespace and the substring after 160 * "svg " is the local name. If the string of characters starts 161 * with "math ", the context element is in the MathML namespace 162 * and the substring after "math " is the local name. 163 * Otherwise, the context element is in the HTML namespace and 164 * the string is the local name. 165 */ 166 if ( str_starts_with( $fragment_context, 'svg ' ) ) { 167 $tag_name = substr( $fragment_context, 4 ); 168 if ( 'svg' === $tag_name ) { 169 $fragment_context_html = '<svg>'; 170 } else { 171 $fragment_context_html = "<svg><{$tag_name}>"; 172 } 173 } elseif ( str_starts_with( $fragment_context, 'math ' ) ) { 174 $tag_name = substr( $fragment_context, 5 ); 175 if ( 'math' === $tag_name ) { 176 $fragment_context_html = '<math>'; 177 } else { 178 $fragment_context_html = "<math><{$tag_name}>"; 179 } 160 180 } else { 161 162 /* 163 * If the string of characters starts with "svg ", the context 164 * element is in the SVG namespace and the substring after 165 * "svg " is the local name. If the string of characters starts 166 * with "math ", the context element is in the MathML namespace 167 * and the substring after "math " is the local name. 168 * Otherwise, the context element is in the HTML namespace and 169 * the string is the local name. 170 */ 171 if ( str_starts_with( $fragment_context, 'svg ' ) ) { 172 $tag_name = substr( $fragment_context, 4 ); 173 if ( 'svg' === $tag_name ) { 174 $parent_processor = WP_HTML_Processor::create_full_parser( '<!DOCTYPE html><svg>' ); 175 } else { 176 $parent_processor = WP_HTML_Processor::create_full_parser( "<!DOCTYPE html><svg><{$tag_name}>" ); 177 } 178 $parent_processor->next_tag( $tag_name ); 179 } elseif ( str_starts_with( $fragment_context, 'math ' ) ) { 180 $tag_name = substr( $fragment_context, 5 ); 181 if ( 'math' === $tag_name ) { 182 $parent_processor = WP_HTML_Processor::create_full_parser( '<!DOCTYPE html><math>' ); 183 } else { 184 $parent_processor = WP_HTML_Processor::create_full_parser( "<!DOCTYPE html><math><{$tag_name}>" ); 185 } 186 $parent_processor->next_tag( $tag_name ); 181 // Tags that only appear in tables need a special case. 182 if ( in_array( 183 $fragment_context, 184 array( 185 'caption', 186 'col', 187 'colgroup', 188 'tbody', 189 'td', 190 'tfoot', 191 'th', 192 'thead', 193 'tr', 194 ), 195 true 196 ) ) { 197 $fragment_context_html = "<table><{$fragment_context}>"; 187 198 } else { 188 if ( in_array( 189 $fragment_context, 190 array( 191 'caption', 192 'col', 193 'colgroup', 194 'tbody', 195 'td', 196 'tfoot', 197 'th', 198 'thead', 199 'tr', 200 ), 201 true 202 ) ) { 203 $parent_processor = WP_HTML_Processor::create_full_parser( "<!DOCTYPE html><table><{$fragment_context}>" ); 204 $parent_processor->next_tag(); 205 } else { 206 $parent_processor = WP_HTML_Processor::create_full_parser( "<!DOCTYPE html><{$fragment_context}>" ); 207 } 208 $parent_processor->next_tag( $fragment_context ); 209 } 210 if ( null !== $parent_processor->get_unsupported_exception() ) { 211 throw $parent_processor->get_unsupported_exception(); 212 } 213 if ( null !== $parent_processor->get_last_error() ) { 214 throw new Exception( $parent_processor->get_last_error() ); 215 } 216 $processor = $parent_processor->create_fragment_at_current_node( $html ); 217 } 199 $fragment_context_html = "<{$fragment_context}>"; 200 } 201 } 202 203 $processor = WP_HTML_Processor::create_fragment( $html, $fragment_context_html ); 218 204 219 205 if ( null === $processor ) {
Note: See TracChangeset
for help on using the changeset viewer.