Ticket #47014: 47014-2.diff
| File 47014-2.diff, 10.9 KB (added by , 7 years ago) |
|---|
-
src/wp-includes/formatting.php
diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php index 51a1388dfd..bde523af0f 100644
a b function force_balance_tags( $text ) { 2465 2465 // WP bug fix for LOVE <3 (and other situations with '<' before a number) 2466 2466 $text = preg_replace( '#<([0-9]{1})#', '<$1', $text ); 2467 2467 2468 while ( preg_match( '/<(\/?[\w:]*)\s*([^>]*)>/', $text, $regex ) ) { 2468 /** 2469 * Matches supported tags 2470 * 2471 * To get the pattern as a string without the comments paste into a PHP 2472 * REPL like `php -a` 2473 * 2474 * @see https://html.spec.whatwg.org/#elements-2 2475 * @see https://w3c.github.io/webcomponents/spec/custom/#valid-custom-element-name 2476 * 2477 * @example 2478 * ~# php -a 2479 * php > $s = [paste copied contents of expression below including parentheses]; 2480 * php > echo $s; 2481 * 2482 * @type string 2483 */ 2484 $tag_pattern = ( 2485 '#<' . // Start with an opening bracket. 2486 '(/?)' . // Group 1 - If it's a closing tag it'll have a leading slash. 2487 '(' . // Group 2 - tag name 2488 // Custom element tags have more lenient rules than HTML tag names. 2489 '(?:[a-z](?:[a-z0-9._]*)-(?:[a-z0-9._-]+)+)' . 2490 '|' . 2491 // Traditional tag rules approximate HTML tag names. 2492 '(?:[\w:]+)' . 2493 ')' . 2494 '(?:' . 2495 // We _either_ immediately close the tag with its '>' and have nothing here. 2496 '\s*' . 2497 '(/?)' . // Group 3 - "attributes" for empty tag 2498 '|' . 2499 // _or_ We must start with space characters to separate the tag name from the attributes (or whitespace). 2500 '(\s+)' . // Group 4 - pre-attribute whitespace 2501 '([^>]*)' . // Group 5 - attributes 2502 ')' . 2503 '>#' // End with a closing bracket. 2504 ); 2505 2506 while ( preg_match( $tag_pattern, $text, $regex ) ) { 2507 $full_match = $regex[0]; 2508 $has_leading_slash = ! empty( $regex[1] ); 2509 $tag_name = $regex[2]; 2510 $tag = strtolower( $tag_name ); 2511 $is_single_tag = in_array( $tag, $single_tags, true ); 2512 $pre_attribute_ws = isset( $regex[4] ) ? $regex[4] : ''; 2513 $attributes = trim( isset( $regex[5] ) ? $regex[5] : $regex[3] ); 2514 $has_self_closer = '/' === substr( $attributes, -1 ); 2515 2469 2516 $newtext .= $tagqueue; 2470 2517 2471 $i = strpos( $text, $ regex[0]);2472 $l = strlen( $ regex[0]);2518 $i = strpos( $text, $full_match ); 2519 $l = strlen( $full_match ); 2473 2520 2474 2521 // clear the shifter 2475 2522 $tagqueue = ''; 2476 2523 // Pop or Push 2477 if ( isset( $regex[1][0] ) && '/' == $regex[1][0] ) { // End Tag 2478 $tag = strtolower( substr( $regex[1], 1 ) ); 2524 if ( $has_leading_slash ) { // End Tag 2479 2525 // if too many closing tags 2480 2526 if ( $stacksize <= 0 ) { 2481 2527 $tag = ''; … … function force_balance_tags( $text ) { 2501 2547 $tag = ''; 2502 2548 } 2503 2549 } else { // Begin Tag 2504 $tag = strtolower( $regex[1] );2505 2506 2550 // Tag Cleaning 2507 2508 // If it's an empty tag "< >", do nothing 2509 if ( '' == $tag ) { 2510 // do nothing 2511 } elseif ( substr( $regex[2], -1 ) == '/' ) { // ElseIf it presents itself as a self-closing tag... 2551 if ( $has_self_closer ) { // If it presents itself as a self-closing tag... 2512 2552 // ...but it isn't a known single-entity self-closing tag, then don't let it be treated as such and 2513 2553 // immediately close it with a closing tag (the tag will encapsulate no text as a result) 2514 if ( ! in_array( $tag, $single_tags )) {2515 $ regex[2] = trim( substr( $regex[2], 0, -1 ) ) . "></$tag";2554 if ( ! $is_single_tag ) { 2555 $attributes = trim( substr( $attributes, 0, -1 ) ) . "></$tag"; 2516 2556 } 2517 } elseif ( in_array( $tag, $single_tags ) ) { // ElseIf it's a known single-entity tag but it doesn't close itself, do so 2518 $regex[2] .= '/'; 2557 } elseif ( $is_single_tag ) { // ElseIf it's a known single-entity tag but it doesn't close itself, do so 2558 $pre_attribute_ws = ' '; 2559 $attributes .= '/'; 2519 2560 } else { // Else it's not a single-entity tag 2520 2561 // If the top of the stack is the same as the tag we want to push, close previous tag 2521 2562 if ( $stacksize > 0 && ! in_array( $tag, $nestable_tags ) && $tagstack[ $stacksize - 1 ] == $tag ) { … … function force_balance_tags( $text ) { 2526 2567 } 2527 2568 2528 2569 // Attributes 2529 $attributes = $regex[2];2530 if ( ! empty( $attributes ) && $attributes[0] != '>' ) {2531 $ attributes = ' ' . $attributes;2570 if ( $has_self_closer && $is_single_tag ) { 2571 // we need some space - avoid <br/> and prefer <br /> 2572 $pre_attribute_ws = ' '; 2532 2573 } 2533 2574 2534 $tag = '<' . $tag . $ attributes . '>';2575 $tag = '<' . $tag . $pre_attribute_ws . $attributes . '>'; 2535 2576 //If already queuing a close tag, then put this tag on, too 2536 2577 if ( ! empty( $tagqueue ) ) { 2537 2578 $tagqueue .= $tag; -
tests/phpunit/tests/formatting/balanceTags.php
diff --git a/tests/phpunit/tests/formatting/balanceTags.php b/tests/phpunit/tests/formatting/balanceTags.php index 783c320780..a533edbe36 100644
a b class Tests_Formatting_BalanceTags extends WP_UnitTestCase { 37 37 ); 38 38 } 39 39 40 function supported_traditional_tag_names() { 41 return array( 42 array( 'a' ), 43 array( 'div' ), 44 array( 'blockquote' ), 45 // HTML tag names can be CAPITALIZED and are case-insensitive 46 array( 'A' ), 47 array( 'dIv' ), 48 array( 'BLOCKQUOTE' ), 49 ); 50 } 51 52 function supported_custom_element_tag_names() { 53 return array( 54 array( 'custom-element' ), 55 array( 'my-custom-element' ), 56 array( 'weekday-5-item' ), 57 array( 'a-big-old-tag-name' ), 58 array( 'with_underscores-and_the_dash' ), 59 ); 60 } 61 62 function invalid_tag_names() { 63 return array( 64 array( '<0-day>inside', '<0-day>inside' ), // can't start with a number - handled by the "<3" "fix" 65 array( '<UPPERCASE-TAG>inside', '<UPPERCASE-TAG>inside' ), // custom elements cannot be uppercase 66 ); 67 } 68 69 // These are valid custom elements but we don't support them yet. 70 // see https://w3c.github.io/webcomponents/spec/custom/#valid-custom-element-name 71 function unsupported_valid_tag_names() { 72 return array( 73 // we don't allow ending in a dash 74 array( '<what->inside' ), 75 // examples from the spec working document 76 array( 'math-α' ), 77 array( 'emotion-😍' ), 78 // UNICODE ranges 79 // middle dot 80 // 0x00b7 81 array( 'b-·' ), 82 // latin characters with accents/modifiers 83 // 0x00c0-0x00d6 84 // 0x00d8-0x00f6 85 array( 'a-À-Ó-Ý' ), 86 // 0x00f8-0x037d 87 array( 'a-ͳ' ), 88 // no 0x037e, which is a Greek semicolon 89 // 0x037f-0x1fff 90 array( 'a-Ფ' ), 91 // zero-width characters, probably never supported 92 // 0x200c-0x200d 93 array( 'a-to-my-left-is-a-zero-width-non-joiner-do-not-delete-it' ), 94 array( 'a-to-my-left-is-a-zero-width-joiner-do-not-delete-it' ), 95 // ties 96 // 0x203f-0x2040 97 array( 'under-‿-tie' ), 98 array( 'over-⁀-tie' ), 99 // 0x2170-0x218f 100 array( 'a-⁰' ), 101 array( 'a-⅀' ), 102 array( 'tag-ↀ-it' ), 103 // 0x2c00-0x2fef 104 array( 'a-Ⰰ' ), 105 array( 'b-ⴓ-c' ), 106 array( 'd-⽗' ), 107 // 0x3001-0xd7ff 108 array( 'a-、' ), 109 array( 'z-态' ), 110 array( 'a-送-䠺-ퟱ-' ), 111 // 0xf900-0xfdcf 112 array( 'a-豈' ), 113 array( 'my-切' ), 114 array( 'aﴀ-tag' ), 115 array( 'my-' ), 116 // 0xfdf0-0xfffd 117 array( 'a-ﷰ' ), 118 array( 'a---�' ), // warning; blank characters are in there 119 // extended ranges 120 // 0x10000-0xeffff 121 array( 'a-𐀀' ), 122 array( 'my-𝀀' ), 123 array( 'a𞀀-' ), 124 ); 125 } 126 127 // These are invalid custom elements but we support them right now in order to keep the parser simpler 128 // see https://w3c.github.io/webcomponents/spec/custom/#valid-custom-element-name 129 function supported_invalid_tag_names() { 130 return array( 131 // reserved names for custom element 132 array( 'annotation-xml' ), 133 array( 'color-profile' ), 134 array( 'font-face' ), 135 array( 'font-face-src' ), 136 array( 'font-face-uri' ), 137 array( 'font-face-format' ), 138 array( 'font-face-name' ), 139 array( 'missing-glyph' ), 140 ); 141 } 142 143 /** 144 * @dataProvider supported_traditional_tag_names 145 */ 146 function test_detects_traditional_tag_names( $tag ) { 147 $normalized = strtolower( $tag ); 148 149 $this->assertEquals( "<$normalized>inside</$normalized>", balanceTags( "<$tag>inside", true ) ); 150 } 151 152 /** 153 * @dataProvider supported_custom_element_tag_names 154 */ 155 function test_detects_supported_custom_element_tag_names( $tag ) { 156 $this->assertEquals( "<$tag>inside</$tag>", balanceTags( "<$tag>inside", true ) ); 157 } 158 159 /** 160 * @dataProvider invalid_tag_names 161 */ 162 function test_ignores_invalid_tag_names( $input, $output ) { 163 $this->assertEquals( $output, balanceTags( $input, true ) ); 164 } 165 166 /** 167 * @dataProvider unsupported_valid_tag_names 168 */ 169 function test_ignores_unsupported_custom_tag_names( $tag ) { 170 $this->assertEquals( "<$tag>inside", balanceTags( "<$tag>inside", true ) ); 171 } 172 173 /** 174 * @dataProvider supported_invalid_tag_names 175 */ 176 function test_detects_supported_invalid_tag_names( $tag ) { 177 $this->assertEquals( "<$tag>inside</$tag>", balanceTags( "<$tag>inside", true ) ); 178 } 179 40 180 /** 41 181 * If a recognized valid single tag appears unclosed, it should get self-closed 42 182 * … … class Tests_Formatting_BalanceTags extends WP_UnitTestCase { 68 208 '<em />', 69 209 '<p class="main1"/>', 70 210 '<p class="main2" />', 211 '<STRONG/>', 71 212 ); 72 213 $expected = array( 73 214 '<strong></strong>', 74 215 '<em></em>', 75 216 '<p class="main1"></p>', 76 217 '<p class="main2"></p>', 218 // Valid tags are transformed to lowercase. 219 '<strong></strong>', 77 220 ); 78 221 79 222 foreach ( $inputs as $key => $input ) { … … class Tests_Formatting_BalanceTags extends WP_UnitTestCase { 221 364 } 222 365 } 223 366 367 /** 368 * Get custom element data. 369 * 370 * @return array Data. 371 */ 372 public function data_custom_elements() { 373 return array( 374 // Valid custom element tags. 375 array( 376 '<my-custom-element data-attribute="value"/>', 377 '<my-custom-element data-attribute="value"></my-custom-element>', 378 ), 379 array( 380 '<my-custom-element>Test</my-custom-element>', 381 '<my-custom-element>Test</my-custom-element>', 382 ), 383 array( 384 '<my-custom-element>Test', 385 '<my-custom-element>Test</my-custom-element>', 386 ), 387 array( 388 'Test</my-custom-element>', 389 'Test', 390 ), 391 array( 392 '</my-custom-element>Test', 393 'Test', 394 ), 395 array( 396 '<my-custom-element/>', 397 '<my-custom-element></my-custom-element>', 398 ), 399 array( 400 '<my-custom-element />', 401 '<my-custom-element></my-custom-element>', 402 ), 403 // Invalid (or at least temporarily unsupported) custom element tags. 404 array( 405 '<MY-CUSTOM-ELEMENT>Test', 406 '<MY-CUSTOM-ELEMENT>Test', 407 ), 408 array( 409 '<my->Test', 410 '<my->Test', 411 ), 412 array( 413 '<--->Test', 414 '<--->Test', 415 ), 416 ); 417 } 418 419 /** 420 * Test custom elements. 421 * 422 * @ticket 47014 423 * @dataProvider data_custom_elements 424 * 425 * @param string $source Source. 426 * @param string $expected Expected. 427 */ 428 public function test_custom_elements( $source, $expected ) { 429 $this->assertEquals( $expected, balanceTags( $source, true ) ); 430 } 224 431 }