Changeset 45929
- Timestamp:
- 09/02/2019 10:24:18 AM (5 years ago)
- Location:
- trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/formatting.php
r45887 r45929 2430 2430 */ 2431 2431 function balanceTags( $text, $force = false ) { // phpcs:ignore WordPress.NamingConventions.ValidFunctionName.FunctionNameInvalid 2432 if ( $force || get_option( 'use_balanceTags' )== 1 ) {2432 if ( $force || (int) get_option( 'use_balanceTags' ) === 1 ) { 2433 2433 return force_balance_tags( $text ); 2434 2434 } else { … … 2441 2441 * 2442 2442 * @since 2.0.4 2443 * @since 5.3.0 Improve accuracy and add support for custom element tags. 2443 2444 * 2444 2445 * @author Leonard Lin <leonard@acm.org> … … 2470 2471 $text = preg_replace( '#<([0-9]{1})#', '<$1', $text ); 2471 2472 2472 while ( preg_match( '/<(\/?[\w:]*)\s*([^>]*)>/', $text, $regex ) ) { 2473 /** 2474 * Matches supported tags. 2475 * 2476 * To get the pattern as a string without the comments paste into a PHP 2477 * REPL like `php -a`. 2478 * 2479 * @see https://html.spec.whatwg.org/#elements-2 2480 * @see https://w3c.github.io/webcomponents/spec/custom/#valid-custom-element-name 2481 * 2482 * @example 2483 * ~# php -a 2484 * php > $s = [paste copied contents of expression below including parentheses]; 2485 * php > echo $s; 2486 */ 2487 $tag_pattern = ( 2488 '#<' . // Start with an opening bracket. 2489 '(/?)' . // Group 1 - If it's a closing tag it'll have a leading slash. 2490 '(' . // Group 2 - Tag name. 2491 // Custom element tags have more lenient rules than HTML tag names. 2492 '(?:[a-z](?:[a-z0-9._]*)-(?:[a-z0-9._-]+)+)' . 2493 '|' . 2494 // Traditional tag rules approximate HTML tag names. 2495 '(?:[\w:]+)' . 2496 ')' . 2497 '(?:' . 2498 // We either immediately close the tag with its '>' and have nothing here. 2499 '\s*' . 2500 '(/?)' . // Group 3 - "attributes" for empty tag. 2501 '|' . 2502 // Or we must start with space characters to separate the tag name from the attributes (or whitespace). 2503 '(\s+)' . // Group 4 - Pre-attribute whitespace. 2504 '([^>]*)' . // Group 5 - Attributes. 2505 ')' . 2506 '>#' // End with a closing bracket. 2507 ); 2508 2509 while ( preg_match( $tag_pattern, $text, $regex ) ) { 2510 $full_match = $regex[0]; 2511 $has_leading_slash = ! empty( $regex[1] ); 2512 $tag_name = $regex[2]; 2513 $tag = strtolower( $tag_name ); 2514 $is_single_tag = in_array( $tag, $single_tags, true ); 2515 $pre_attribute_ws = isset( $regex[4] ) ? $regex[4] : ''; 2516 $attributes = trim( isset( $regex[5] ) ? $regex[5] : $regex[3] ); 2517 $has_self_closer = '/' === substr( $attributes, -1 ); 2518 2473 2519 $newtext .= $tagqueue; 2474 2520 2475 $i = strpos( $text, $ regex[0]);2476 $l = strlen( $ regex[0]);2477 2478 // clear the shifter2521 $i = strpos( $text, $full_match ); 2522 $l = strlen( $full_match ); 2523 2524 // Clear the shifter. 2479 2525 $tagqueue = ''; 2480 // Pop or Push 2481 if ( isset( $regex[1][0] ) && '/' == $regex[1][0] ) { // End Tag 2482 $tag = strtolower( substr( $regex[1], 1 ) ); 2483 // if too many closing tags 2526 if ( $has_leading_slash ) { // End Tag. 2527 // If too many closing tags. 2484 2528 if ( $stacksize <= 0 ) { 2485 2529 $tag = ''; 2486 // or close to be safe $tag = '/' . $tag; 2487 2488 // if stacktop value = tag close value then pop 2489 } elseif ( $tagstack[ $stacksize - 1 ] == $tag ) { // found closing tag 2490 $tag = '</' . $tag . '>'; // Close Tag 2491 // Pop 2530 // Or close to be safe $tag = '/' . $tag. 2531 2532 // If stacktop value = tag close value, then pop. 2533 } elseif ( $tagstack[ $stacksize - 1 ] === $tag ) { // Found closing tag. 2534 $tag = '</' . $tag . '>'; // Close Tag. 2492 2535 array_pop( $tagstack ); 2493 2536 $stacksize--; 2494 } else { // closing tag not at top, search for it2537 } else { // Closing tag not at top, search for it. 2495 2538 for ( $j = $stacksize - 1; $j >= 0; $j-- ) { 2496 if ( $tagstack[ $j ] == $tag ) {2497 // add tag to tagqueue2539 if ( $tagstack[ $j ] === $tag ) { 2540 // Add tag to tagqueue. 2498 2541 for ( $k = $stacksize - 1; $k >= $j; $k-- ) { 2499 2542 $tagqueue .= '</' . array_pop( $tagstack ) . '>'; … … 2505 2548 $tag = ''; 2506 2549 } 2507 } else { // Begin Tag 2508 $tag = strtolower( $regex[1] ); 2509 2510 // Tag Cleaning 2511 2512 // If it's an empty tag "< >", do nothing 2513 if ( '' == $tag ) { 2514 // do nothing 2515 } elseif ( substr( $regex[2], -1 ) == '/' ) { // ElseIf it presents itself as a self-closing tag... 2550 } else { // Begin Tag. 2551 if ( $has_self_closer ) { // If it presents itself as a self-closing tag... 2516 2552 // ...but it isn't a known single-entity self-closing tag, then don't let it be treated as such and 2517 2553 // immediately close it with a closing tag (the tag will encapsulate no text as a result) 2518 if ( ! in_array( $tag, $single_tags )) {2519 $ regex[2] = trim( substr( $regex[2], 0, -1 ) ) . "></$tag";2554 if ( ! $is_single_tag ) { 2555 $attributes = trim( substr( $attributes, 0, -1 ) ) . "></$tag"; 2520 2556 } 2521 } elseif ( in_array( $tag, $single_tags ) ) { // ElseIf it's a known single-entity tag but it doesn't close itself, do so 2522 $regex[2] .= '/'; 2523 } else { // Else it's not a single-entity tag 2524 // If the top of the stack is the same as the tag we want to push, close previous tag 2525 if ( $stacksize > 0 && ! in_array( $tag, $nestable_tags ) && $tagstack[ $stacksize - 1 ] == $tag ) { 2557 } elseif ( $is_single_tag ) { // ElseIf it's a known single-entity tag but it doesn't close itself, do so 2558 $pre_attribute_ws = ' '; 2559 $attributes .= '/'; 2560 } else { // It's not a single-entity tag. 2561 // If the top of the stack is the same as the tag we want to push, close previous tag. 2562 if ( $stacksize > 0 && ! in_array( $tag, $nestable_tags, true ) && $tagstack[ $stacksize - 1 ] === $tag ) { 2526 2563 $tagqueue = '</' . array_pop( $tagstack ) . '>'; 2527 2564 $stacksize--; … … 2530 2567 } 2531 2568 2532 // Attributes 2533 $attributes = $regex[2];2534 if ( ! empty( $attributes ) && $attributes[0] != '>' ) {2535 $ attributes = ' ' . $attributes;2569 // Attributes. 2570 if ( $has_self_closer && $is_single_tag ) { 2571 // We need some space - avoid <br/> and prefer <br />. 2572 $pre_attribute_ws = ' '; 2536 2573 } 2537 2574 2538 $tag = '<' . $tag . $ attributes . '>';2539 // If already queuing a close tag, then put this tag on, too2575 $tag = '<' . $tag . $pre_attribute_ws . $attributes . '>'; 2576 // If already queuing a close tag, then put this tag on too. 2540 2577 if ( ! empty( $tagqueue ) ) { 2541 2578 $tagqueue .= $tag; … … 2547 2584 } 2548 2585 2549 // Clear Tag Queue 2586 // Clear Tag Queue. 2550 2587 $newtext .= $tagqueue; 2551 2588 2552 // Add Remaining text2589 // Add remaining text. 2553 2590 $newtext .= $text; 2554 2591 2555 // Empty Stack2556 2592 while ( $x = array_pop( $tagstack ) ) { 2557 $newtext .= '</' . $x . '>'; // Add remaining tags to close 2558 } 2559 2560 // WP fix for the bug with HTML comments 2593 $newtext .= '</' . $x . '>'; // Add remaining tags to close. 2594 } 2595 2596 // WP fix for the bug with HTML comments. 2561 2597 $newtext = str_replace( '< !--', '<!--', $newtext ); 2562 2598 $newtext = str_replace( '< !--', '< !--', $newtext ); -
trunk/tests/phpunit/tests/formatting/balanceTags.php
r42343 r45929 38 38 } 39 39 40 function supported_traditional_tag_names() { 41 return array( 42 array( 'a' ), 43 array( 'div' ), 44 array( 'blockquote' ), 45 // HTML tag names can be CAPITALIZED and are case-insensitive. 46 array( 'A' ), 47 array( 'dIv' ), 48 array( 'BLOCKQUOTE' ), 49 ); 50 } 51 52 function supported_custom_element_tag_names() { 53 return array( 54 array( 'custom-element' ), 55 array( 'my-custom-element' ), 56 array( 'weekday-5-item' ), 57 array( 'a-big-old-tag-name' ), 58 array( 'with_underscores-and_the_dash' ), 59 array( 'a-.' ), 60 array( 'a._-.-_' ), 61 ); 62 } 63 64 function invalid_tag_names() { 65 return array( 66 array( '<0-day>inside', '<0-day>inside' ), // Can't start with a number - handled by the "<3" fix. 67 array( '<UPPERCASE-TAG>inside', '<UPPERCASE-TAG>inside' ), // Custom elements cannot be uppercase. 68 ); 69 } 70 71 /** 72 * These are valid custom elements but we don't support them yet. 73 * 74 * @see https://w3c.github.io/webcomponents/spec/custom/#valid-custom-element-name 75 */ 76 function unsupported_valid_tag_names() { 77 return array( 78 // We don't allow ending in a dash. 79 array( '<what->inside' ), 80 // Examples from the spec working document. 81 array( 'math-α' ), 82 array( 'emotion-😍' ), 83 // UNICODE ranges 84 // 0x00b7 85 array( 'b-·' ), 86 // Latin characters with accents/modifiers. 87 // 0x00c0-0x00d6 88 // 0x00d8-0x00f6 89 array( 'a-À-Ó-Ý' ), 90 // 0x00f8-0x037d 91 array( 'a-ͳ' ), 92 // No 0x037e, which is a Greek semicolon. 93 // 0x037f-0x1fff 94 array( 'a-Ფ' ), 95 // Zero-width characters, probably never supported. 96 // 0x200c-0x200d 97 array( 'a-to-my-left-is-a-zero-width-non-joiner-do-not-delete-it' ), 98 array( 'a-to-my-left-is-a-zero-width-joiner-do-not-delete-it' ), 99 // Ties. 100 // 0x203f-0x2040 101 array( 'under-‿-tie' ), 102 array( 'over-⁀-tie' ), 103 // 0x2170-0x218f 104 array( 'a-⁰' ), 105 array( 'a-⅀' ), 106 array( 'tag-ↀ-it' ), 107 // 0x2c00-0x2fef 108 array( 'a-Ⰰ' ), 109 array( 'b-ⴓ-c' ), 110 array( 'd-⽗' ), 111 // 0x3001-0xd7ff 112 array( 'a-、' ), 113 array( 'z-态' ), 114 array( 'a-送-䠺-ퟱ-' ), 115 // 0xf900-0xfdcf 116 array( 'a-豈' ), 117 array( 'my-切' ), 118 array( 'aﴀ-tag' ), 119 array( 'my-' ), 120 // 0xfdf0-0xfffd 121 array( 'a-ﷰ' ), 122 array( 'a---�' ), // Warning; blank characters are in there. 123 // Extended ranges. 124 // 0x10000-0xeffff 125 array( 'a-𐀀' ), 126 array( 'my-𝀀' ), 127 array( 'a𞀀-' ), 128 ); 129 } 130 131 /** 132 * These are invalid custom elements but we support them right now in order to keep the parser simpler. 133 * 134 * @see https://w3c.github.io/webcomponents/spec/custom/#valid-custom-element-name 135 */ 136 function supported_invalid_tag_names() { 137 return array( 138 // Reserved names for custom elements. 139 array( 'annotation-xml' ), 140 array( 'color-profile' ), 141 array( 'font-face' ), 142 array( 'font-face-src' ), 143 array( 'font-face-uri' ), 144 array( 'font-face-format' ), 145 array( 'font-face-name' ), 146 array( 'missing-glyph' ), 147 ); 148 } 149 150 /** 151 * @ticket 47014 152 * @dataProvider supported_traditional_tag_names 153 */ 154 function test_detects_traditional_tag_names( $tag ) { 155 $normalized = strtolower( $tag ); 156 157 $this->assertEquals( "<$normalized>inside</$normalized>", balanceTags( "<$tag>inside", true ) ); 158 } 159 160 /** 161 * @ticket 47014 162 * @dataProvider supported_custom_element_tag_names 163 */ 164 function test_detects_supported_custom_element_tag_names( $tag ) { 165 $this->assertEquals( "<$tag>inside</$tag>", balanceTags( "<$tag>inside", true ) ); 166 } 167 168 /** 169 * @ticket 47014 170 * @dataProvider invalid_tag_names 171 */ 172 function test_ignores_invalid_tag_names( $input, $output ) { 173 $this->assertEquals( $output, balanceTags( $input, true ) ); 174 } 175 176 /** 177 * @ticket 47014 178 * @dataProvider unsupported_valid_tag_names 179 */ 180 function test_ignores_unsupported_custom_tag_names( $tag ) { 181 $this->assertEquals( "<$tag>inside", balanceTags( "<$tag>inside", true ) ); 182 } 183 184 /** 185 * @ticket 47014 186 * @dataProvider supported_invalid_tag_names 187 */ 188 function test_detects_supported_invalid_tag_names( $tag ) { 189 $this->assertEquals( "<$tag>inside</$tag>", balanceTags( "<$tag>inside", true ) ); 190 } 191 40 192 /** 41 193 * If a recognized valid single tag appears unclosed, it should get self-closed … … 69 221 '<p class="main1"/>', 70 222 '<p class="main2" />', 223 '<STRONG/>', 71 224 ); 72 225 $expected = array( … … 75 228 '<p class="main1"></p>', 76 229 '<p class="main2"></p>', 230 // Valid tags are transformed to lowercase. 231 '<strong></strong>', 77 232 ); 78 233 … … 222 377 } 223 378 379 /** 380 * Get custom element data. 381 * 382 * @return array Data. 383 */ 384 public function data_custom_elements() { 385 return array( 386 // Valid custom element tags. 387 array( 388 '<my-custom-element data-attribute="value"/>', 389 '<my-custom-element data-attribute="value"></my-custom-element>', 390 ), 391 array( 392 '<my-custom-element>Test</my-custom-element>', 393 '<my-custom-element>Test</my-custom-element>', 394 ), 395 array( 396 '<my-custom-element>Test', 397 '<my-custom-element>Test</my-custom-element>', 398 ), 399 array( 400 'Test</my-custom-element>', 401 'Test', 402 ), 403 array( 404 '</my-custom-element>Test', 405 'Test', 406 ), 407 array( 408 '<my-custom-element/>', 409 '<my-custom-element></my-custom-element>', 410 ), 411 array( 412 '<my-custom-element />', 413 '<my-custom-element></my-custom-element>', 414 ), 415 // Invalid (or at least temporarily unsupported) custom element tags. 416 array( 417 '<MY-CUSTOM-ELEMENT>Test', 418 '<MY-CUSTOM-ELEMENT>Test', 419 ), 420 array( 421 '<my->Test', 422 '<my->Test', 423 ), 424 array( 425 '<--->Test', 426 '<--->Test', 427 ), 428 ); 429 } 430 431 /** 432 * Test custom elements. 433 * 434 * @ticket 47014 435 * @dataProvider data_custom_elements 436 * 437 * @param string $source Source. 438 * @param string $expected Expected. 439 */ 440 public function test_custom_elements( $source, $expected ) { 441 $this->assertEquals( $expected, balanceTags( $source, true ) ); 442 } 224 443 }
Note: See TracChangeset
for help on using the changeset viewer.