Changeset 58985
- Timestamp:
- 09/04/2024 04:32:37 AM (5 months ago)
- Location:
- trunk
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/html-api/class-wp-html-processor-state.php
r58867 r58985 301 301 302 302 /** 303 * No-quirks mode document compatability mode.304 *305 * > In no-quirks mode, the behavior is (hopefully) the desired behavior306 * > described by the modern HTML and CSS specifications.307 *308 * @since 6.7.0309 *310 * @var string311 */312 const NO_QUIRKS_MODE = 'no-quirks-mode';313 314 /**315 * Quirks mode document compatability mode.316 *317 * > In quirks mode, layout emulates behavior in Navigator 4 and Internet318 * > Explorer 5. This is essential in order to support websites that were319 * > built before the widespread adoption of web standards.320 *321 * @since 6.7.0322 *323 * @var string324 */325 const QUIRKS_MODE = 'quirks-mode';326 327 /**328 303 * The stack of template insertion modes. 329 304 * … … 381 356 */ 382 357 public $insertion_mode = self::INSERTION_MODE_INITIAL; 383 384 /**385 * Indicates if the document is in quirks mode or no-quirks mode.386 *387 * Impact on HTML parsing:388 *389 * - In `NO_QUIRKS_MODE` CSS class and ID selectors match in a byte-for-byte390 * manner, otherwise for backwards compatability, class selectors are to391 * match in an ASCII case-insensitive manner.392 *393 * - When not in `QUIRKS_MODE`, a TABLE start tag implicitly closes an open P tag394 * if one is in scope and open, otherwise the TABLE becomes a child of the P.395 *396 * `QUIRKS_MODE` impacts many styling-related aspects of an HTML document, but397 * none of the other changes modifies how the HTML is parsed or selected.398 *399 * @see self::QUIRKS_MODE400 * @see self::NO_QUIRKS_MODE401 *402 * @since 6.7.0403 *404 * @var string405 */406 public $document_mode = self::NO_QUIRKS_MODE;407 358 408 359 /** -
trunk/src/wp-includes/html-api/class-wp-html-processor.php
r58977 r58985 1081 1081 $doctype = $this->get_doctype_info(); 1082 1082 if ( null !== $doctype && 'quirks' === $doctype->indicated_compatability_mode ) { 1083 $this-> state->document_mode = WP_HTML_Processor_State::QUIRKS_MODE;1083 $this->compat_mode = WP_HTML_Tag_Processor::QUIRKS_MODE; 1084 1084 } 1085 1085 … … 1096 1096 */ 1097 1097 initial_anything_else: 1098 $this-> state->document_mode = WP_HTML_Processor_State::QUIRKS_MODE;1098 $this->compat_mode = WP_HTML_Tag_Processor::QUIRKS_MODE; 1099 1099 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML; 1100 1100 return $this->step( self::REPROCESS_CURRENT_NODE ); … … 2449 2449 */ 2450 2450 if ( 2451 WP_HTML_ Processor_State::QUIRKS_MODE !== $this->state->document_mode &&2451 WP_HTML_Tag_Processor::QUIRKS_MODE !== $this->compat_mode && 2452 2452 $this->state->stack_of_open_elements->has_p_in_button_scope() 2453 2453 ) { … … 4938 4938 * 4939 4939 * @since 6.6.0 Subclassed for the HTML Processor. 4940 * 4941 * @todo When reconstructing active formatting elements with attributes, find a way 4942 * to indicate if the virtually-reconstructed formatting elements contain the 4943 * wanted class name. 4940 4944 * 4941 4945 * @param string $wanted_class Look for this CSS class name, ASCII case-insensitive. -
trunk/src/wp-includes/html-api/class-wp-html-tag-processor.php
r58977 r58985 511 511 */ 512 512 protected $parser_state = self::STATE_READY; 513 514 /** 515 * Indicates if the document is in quirks mode or no-quirks mode. 516 * 517 * Impact on HTML parsing: 518 * 519 * - In `NO_QUIRKS_MODE` (also known as "standard mode"): 520 * - CSS class and ID selectors match byte-for-byte (case-sensitively). 521 * - A TABLE start tag `<table>` implicitly closes any open `P` element. 522 * 523 * - In `QUIRKS_MODE`: 524 * - CSS class and ID selectors match match in an ASCII case-insensitive manner. 525 * - A TABLE start tag `<table>` opens a `TABLE` element as a child of a `P` 526 * element if one is open. 527 * 528 * Quirks and no-quirks mode are thus mostly about styling, but have an impact when 529 * tables are found inside paragraph elements. 530 * 531 * @see self::QUIRKS_MODE 532 * @see self::NO_QUIRKS_MODE 533 * 534 * @since 6.7.0 535 * 536 * @var string 537 */ 538 protected $compat_mode = self::NO_QUIRKS_MODE; 513 539 514 540 /** … … 1156 1182 $seen = array(); 1157 1183 1184 $is_quirks = self::QUIRKS_MODE === $this->compat_mode; 1185 1158 1186 $at = 0; 1159 1187 while ( $at < strlen( $class ) ) { … … 1170 1198 } 1171 1199 1172 /* 1173 * CSS class names are case-insensitive in the ASCII range. 1174 * 1175 * @see https://www.w3.org/TR/CSS2/syndata.html#x1 1176 */ 1177 $name = str_replace( "\x00", "\u{FFFD}", strtolower( substr( $class, $at, $length ) ) ); 1178 $at += $length; 1200 $name = str_replace( "\x00", "\u{FFFD}", substr( $class, $at, $length ) ); 1201 if ( $is_quirks ) { 1202 $name = strtolower( $name ); 1203 } 1204 $at += $length; 1179 1205 1180 1206 /* … … 1206 1232 } 1207 1233 1208 $wanted_class = strtolower( $wanted_class ); 1209 1234 $case_insensitive = self::QUIRKS_MODE === $this->compat_mode; 1235 1236 $wanted_length = strlen( $wanted_class ); 1210 1237 foreach ( $this->class_list() as $class_name ) { 1211 if ( $class_name === $wanted_class ) { 1238 if ( 1239 strlen( $class_name ) === $wanted_length && 1240 0 === substr_compare( $class_name, $wanted_class, 0, strlen( $wanted_class ), $case_insensitive ) 1241 ) { 1212 1242 return true; 1213 1243 } … … 2297 2327 $modified = false; 2298 2328 2329 $seen = array(); 2330 $to_remove = array(); 2331 $is_quirks = self::QUIRKS_MODE === $this->compat_mode; 2332 if ( $is_quirks ) { 2333 foreach ( $this->classname_updates as $updated_name => $action ) { 2334 if ( self::REMOVE_CLASS === $action ) { 2335 $to_remove[] = strtolower( $updated_name ); 2336 } 2337 } 2338 } else { 2339 foreach ( $this->classname_updates as $updated_name => $action ) { 2340 if ( self::REMOVE_CLASS === $action ) { 2341 $to_remove[] = $updated_name; 2342 } 2343 } 2344 } 2345 2299 2346 // Remove unwanted classes by only copying the new ones. 2300 2347 $existing_class_length = strlen( $existing_class ); … … 2312 2359 } 2313 2360 2314 $name = substr( $existing_class, $at, $name_length ); 2315 $at += $name_length; 2316 2317 // If this class is marked for removal, start processing the next one. 2318 $remove_class = ( 2319 isset( $this->classname_updates[ $name ] ) && 2320 self::REMOVE_CLASS === $this->classname_updates[ $name ] 2321 ); 2322 2323 // If a class has already been seen then skip it; it should not be added twice. 2324 if ( ! $remove_class ) { 2325 $this->classname_updates[ $name ] = self::SKIP_CLASS; 2326 } 2327 2328 if ( $remove_class ) { 2361 $name = substr( $existing_class, $at, $name_length ); 2362 $comparable_class_name = $is_quirks ? strtolower( $name ) : $name; 2363 $at += $name_length; 2364 2365 // If this class is marked for removal, remove it and move on to the next one. 2366 if ( in_array( $comparable_class_name, $to_remove, true ) ) { 2329 2367 $modified = true; 2330 2368 continue; 2331 2369 } 2370 2371 // If a class has already been seen then skip it; it should not be added twice. 2372 if ( in_array( $comparable_class_name, $seen, true ) ) { 2373 continue; 2374 } 2375 2376 $seen[] = $comparable_class_name; 2332 2377 2333 2378 /* … … 2351 2396 // Add new classes by appending those which haven't already been seen. 2352 2397 foreach ( $this->classname_updates as $name => $operation ) { 2353 if ( self::ADD_CLASS === $operation ) { 2398 $comparable_name = $is_quirks ? strtolower( $name ) : $name; 2399 if ( self::ADD_CLASS === $operation && ! in_array( $comparable_name, $seen, true ) ) { 2354 2400 $modified = true; 2355 2401 … … 3933 3979 } 3934 3980 3981 if ( self::QUIRKS_MODE !== $this->compat_mode ) { 3982 $this->classname_updates[ $class_name ] = self::ADD_CLASS; 3983 return true; 3984 } 3985 3986 /* 3987 * Because class names are matched ASCII-case-insensitively in quirks mode, 3988 * this needs to see if a case variant of the given class name is already 3989 * enqueued and update that existing entry, if so. This picks the casing of 3990 * the first-provided class name for all lexical variations. 3991 */ 3992 $class_name_length = strlen( $class_name ); 3993 foreach ( $this->classname_updates as $updated_name => $action ) { 3994 if ( 3995 strlen( $updated_name ) === $class_name_length && 3996 0 === substr_compare( $updated_name, $class_name, 0, $class_name_length, true ) 3997 ) { 3998 $this->classname_updates[ $updated_name ] = self::ADD_CLASS; 3999 return true; 4000 } 4001 } 4002 3935 4003 $this->classname_updates[ $class_name ] = self::ADD_CLASS; 3936 3937 4004 return true; 3938 4005 } … … 3954 4021 } 3955 4022 3956 if ( null !== $this->tag_name_starts_at) {4023 if ( self::QUIRKS_MODE !== $this->compat_mode ) { 3957 4024 $this->classname_updates[ $class_name ] = self::REMOVE_CLASS; 3958 } 3959 4025 return true; 4026 } 4027 4028 /* 4029 * Because class names are matched ASCII-case-insensitively in quirks mode, 4030 * this needs to see if a case variant of the given class name is already 4031 * enqueued and update that existing entry, if so. This picks the casing of 4032 * the first-provided class name for all lexical variations. 4033 */ 4034 $class_name_length = strlen( $class_name ); 4035 foreach ( $this->classname_updates as $updated_name => $action ) { 4036 if ( 4037 strlen( $updated_name ) === $class_name_length && 4038 0 === substr_compare( $updated_name, $class_name, 0, $class_name_length, true ) 4039 ) { 4040 $this->classname_updates[ $updated_name ] = self::REMOVE_CLASS; 4041 return true; 4042 } 4043 } 4044 4045 $this->classname_updates[ $class_name ] = self::REMOVE_CLASS; 3960 4046 return true; 3961 4047 } … … 4352 4438 4353 4439 /** 4440 * No-quirks mode document compatability mode. 4441 * 4442 * > In no-quirks mode, the behavior is (hopefully) the desired behavior 4443 * > described by the modern HTML and CSS specifications. 4444 * 4445 * @see self::$compat_mode 4446 * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Quirks_Mode_and_Standards_Mode 4447 * 4448 * @since 6.7.0 4449 * 4450 * @var string 4451 */ 4452 const NO_QUIRKS_MODE = 'no-quirks-mode'; 4453 4454 /** 4455 * Quirks mode document compatability mode. 4456 * 4457 * > In quirks mode, layout emulates behavior in Navigator 4 and Internet 4458 * > Explorer 5. This is essential in order to support websites that were 4459 * > built before the widespread adoption of web standards. 4460 * 4461 * @see self::$compat_mode 4462 * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Quirks_Mode_and_Standards_Mode 4463 * 4464 * @since 6.7.0 4465 * 4466 * @var string 4467 */ 4468 const QUIRKS_MODE = 'quirks-mode'; 4469 4470 /** 4354 4471 * Indicates that a span of text may contain any combination of significant 4355 4472 * kinds of characters: NULL bytes, whitespace, and others. -
trunk/tests/phpunit/tests/html-api/wpHtmlProcessor.php
r58892 r58985 520 520 $this->assertTrue( $processor->next_tag( 'script' ) ); 521 521 } 522 523 /** 524 * Ensures that the tag processor is case sensitive when removing CSS classes in no-quirks mode. 525 * 526 * @ticket 61531 527 * 528 * @covers ::remove_class 529 */ 530 public function test_remove_class_no_quirks_mode() { 531 $processor = WP_HTML_Processor::create_full_parser( '<!DOCTYPE html><span class="UPPER">' ); 532 $processor->next_tag( 'SPAN' ); 533 $processor->remove_class( 'upper' ); 534 $this->assertSame( '<!DOCTYPE html><span class="UPPER">', $processor->get_updated_html() ); 535 536 $processor->remove_class( 'UPPER' ); 537 $this->assertSame( '<!DOCTYPE html><span >', $processor->get_updated_html() ); 538 } 539 540 /** 541 * Ensures that the tag processor is case sensitive when adding CSS classes in no-quirks mode. 542 * 543 * @ticket 61531 544 * 545 * @covers ::add_class 546 */ 547 public function test_add_class_no_quirks_mode() { 548 $processor = WP_HTML_Processor::create_full_parser( '<!DOCTYPE html><span class="UPPER">' ); 549 $processor->next_tag( 'SPAN' ); 550 $processor->add_class( 'UPPER' ); 551 $this->assertSame( '<!DOCTYPE html><span class="UPPER">', $processor->get_updated_html() ); 552 553 $processor->add_class( 'upper' ); 554 $this->assertSame( '<!DOCTYPE html><span class="UPPER upper">', $processor->get_updated_html() ); 555 } 556 557 /** 558 * Ensures that the tag processor is case sensitive when checking has CSS classes in no-quirks mode. 559 * 560 * @ticket 61531 561 * 562 * @covers ::has_class 563 */ 564 public function test_has_class_no_quirks_mode() { 565 $processor = WP_HTML_Processor::create_full_parser( '<!DOCTYPE html><span class="UPPER">' ); 566 $processor->next_tag( 'SPAN' ); 567 $this->assertFalse( $processor->has_class( 'upper' ) ); 568 $this->assertTrue( $processor->has_class( 'UPPER' ) ); 569 } 570 571 /** 572 * Ensures that the tag processor lists unique CSS class names in no-quirks mode. 573 * 574 * @ticket 61531 575 * 576 * @covers ::class_list 577 */ 578 public function test_class_list_no_quirks_mode() { 579 $processor = WP_HTML_Processor::create_full_parser( 580 /* 581 * U+00C9 is LATIN CAPITAL LETTER E WITH ACUTE 582 * U+0045 is LATIN CAPITAL LETTER E 583 * U+0301 is COMBINING ACUTE ACCENT 584 * 585 * This tests not only that the class matching deduplicates the É, but also 586 * that it treats the same character in different normalization forms as 587 * distinct, since matching occurs on a byte-for-byte basis. 588 */ 589 "<!DOCTYPE html><span class='A A a B b \u{C9} \u{45}\u{0301} \u{C9} é'>" 590 ); 591 $processor->next_tag( 'SPAN' ); 592 $class_list = iterator_to_array( $processor->class_list() ); 593 $this->assertSame( 594 array( 'A', 'a', 'B', 'b', 'É', "E\u{0301}", 'é' ), 595 $class_list 596 ); 597 } 598 599 /** 600 * Ensures that the tag processor is case insensitive when removing CSS classes in quirks mode. 601 * 602 * @ticket 61531 603 * 604 * @covers ::remove_class 605 */ 606 public function test_remove_class_quirks_mode() { 607 $processor = WP_HTML_Processor::create_full_parser( '<span class="uPPER">' ); 608 $processor->next_tag( 'SPAN' ); 609 $processor->remove_class( 'upPer' ); 610 $this->assertSame( '<span >', $processor->get_updated_html() ); 611 } 612 613 /** 614 * Ensures that the tag processor is case insensitive when adding CSS classes in quirks mode. 615 * 616 * @ticket 61531 617 * 618 * @covers ::add_class 619 */ 620 public function test_add_class_quirks_mode() { 621 $processor = WP_HTML_Processor::create_full_parser( '<span class="UPPER">' ); 622 $processor->next_tag( 'SPAN' ); 623 $processor->add_class( 'upper' ); 624 625 $this->assertSame( '<span class="UPPER">', $processor->get_updated_html() ); 626 627 $processor->add_class( 'ANOTHER-UPPER' ); 628 $this->assertSame( '<span class="UPPER ANOTHER-UPPER">', $processor->get_updated_html() ); 629 } 630 631 /** 632 * Ensures that the tag processor is case sensitive when checking has CSS classes in quirks mode. 633 * 634 * @ticket 61531 635 * 636 * @covers ::has_class 637 */ 638 public function test_has_class_quirks_mode() { 639 $processor = WP_HTML_Processor::create_full_parser( '<span class="UPPER">' ); 640 $processor->next_tag( 'SPAN' ); 641 $this->assertTrue( $processor->has_class( 'upper' ) ); 642 $this->assertTrue( $processor->has_class( 'UPPER' ) ); 643 } 644 645 /** 646 * Ensures that the tag processor lists unique CSS class names in quirks mode. 647 * 648 * @ticket 61531 649 * 650 * @covers ::class_list 651 */ 652 public function test_class_list_quirks_mode() { 653 $processor = WP_HTML_Processor::create_full_parser( 654 /* 655 * U+00C9 is LATIN CAPITAL LETTER E WITH ACUTE 656 * U+0045 is LATIN CAPITAL LETTER E 657 * U+0065 is LATIN SMALL LETTER E 658 * U+0301 is COMBINING ACUTE ACCENT 659 * 660 * This tests not only that the class matching deduplicates the É, but also 661 * that it treats the same character in different normalization forms as 662 * distinct, since matching occurs on a byte-for-byte basis. 663 */ 664 "<span class='A A a B b \u{C9} \u{45}\u{301} \u{C9} é \u{65}\u{301}'>" 665 ); 666 $processor->next_tag( 'SPAN' ); 667 $class_list = iterator_to_array( $processor->class_list() ); 668 $this->assertSame( 669 array( 'a', 'b', 'É', "e\u{301}", 'é' ), 670 $class_list 671 ); 672 } 522 673 }
Note: See TracChangeset
for help on using the changeset viewer.