Ticket #47014: 47014-4.diff
| File 47014-4.diff, 11.0 KB (added by , 7 years ago) |
|---|
-
src/wp-includes/formatting.php
diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php index 51a1388dfd..27cff2434f 100644
a b function force_balance_tags( $text ) { 2465 2465 // WP bug fix for LOVE <3 (and other situations with '<' before a number) 2466 2466 $text = preg_replace( '#<([0-9]{1})#', '<$1', $text ); 2467 2467 2468 while ( preg_match( '/<(\/?[\w:]*)\s*([^>]*)>/', $text, $regex ) ) { 2468 /** 2469 * Matches supported tags 2470 * 2471 * To get the pattern as a string without the comments paste into a PHP 2472 * REPL like `php -a` 2473 * 2474 * @see https://html.spec.whatwg.org/#elements-2 2475 * @see https://w3c.github.io/webcomponents/spec/custom/#valid-custom-element-name 2476 * 2477 * @example 2478 * ~# php -a 2479 * php > $s = [paste copied contents of expression below including parentheses]; 2480 * php > echo $s; 2481 * 2482 * @type string 2483 */ 2484 $tag_pattern = ( 2485 '#<' . // Start with an opening bracket. 2486 '(/?)' . // Group 1 - If it's a closing tag it'll have a leading slash. 2487 '(' . // Group 2 - tag name 2488 // Custom element tags have more lenient rules than HTML tag names. 2489 '(?:[a-z](?:[a-z0-9._]*)-(?:[a-z0-9._-]+)+)' . 2490 '|' . 2491 // Traditional tag rules approximate HTML tag names. 2492 '(?:[\w:]+)' . 2493 ')' . 2494 '(?:' . 2495 // We _either_ immediately close the tag with its '>' and have nothing here. 2496 '\s*' . 2497 '(/?)' . // Group 3 - "attributes" for empty tag 2498 '|' . 2499 // _or_ We must start with space characters to separate the tag name from the attributes (or whitespace). 2500 '(\s+)' . // Group 4 - pre-attribute whitespace 2501 '([^>]*)' . // Group 5 - attributes 2502 ')' . 2503 '>#' // End with a closing bracket. 2504 ); 2505 2506 while ( preg_match( $tag_pattern, $text, $regex ) ) { 2507 $full_match = $regex[0]; 2508 $has_leading_slash = ! empty( $regex[1] ); 2509 $tag_name = $regex[2]; 2510 $tag = strtolower( $tag_name ); 2511 $is_single_tag = in_array( $tag, $single_tags, true ); 2512 $pre_attribute_ws = isset( $regex[4] ) ? $regex[4] : ''; 2513 $attributes = trim( isset( $regex[5] ) ? $regex[5] : $regex[3] ); 2514 $has_self_closer = '/' === substr( $attributes, -1 ); 2515 2469 2516 $newtext .= $tagqueue; 2470 2517 2471 $i = strpos( $text, $ regex[0]);2472 $l = strlen( $ regex[0]);2518 $i = strpos( $text, $full_match ); 2519 $l = strlen( $full_match ); 2473 2520 2474 2521 // clear the shifter 2475 2522 $tagqueue = ''; 2476 2523 // Pop or Push 2477 if ( isset( $regex[1][0] ) && '/' == $regex[1][0] ) { // End Tag 2478 $tag = strtolower( substr( $regex[1], 1 ) ); 2524 if ( $has_leading_slash ) { // End Tag 2479 2525 // if too many closing tags 2480 2526 if ( $stacksize <= 0 ) { 2481 2527 $tag = ''; … … function force_balance_tags( $text ) { 2501 2547 $tag = ''; 2502 2548 } 2503 2549 } else { // Begin Tag 2504 $tag = strtolower( $regex[1] );2505 2506 2550 // Tag Cleaning 2507 2508 // If it's an empty tag "< >", do nothing 2509 if ( '' == $tag ) { 2510 // do nothing 2511 } elseif ( substr( $regex[2], -1 ) == '/' ) { // ElseIf it presents itself as a self-closing tag... 2551 if ( $has_self_closer ) { // If it presents itself as a self-closing tag... 2512 2552 // ...but it isn't a known single-entity self-closing tag, then don't let it be treated as such and 2513 2553 // immediately close it with a closing tag (the tag will encapsulate no text as a result) 2514 if ( ! in_array( $tag, $single_tags )) {2515 $ regex[2] = trim( substr( $regex[2], 0, -1 ) ) . "></$tag";2554 if ( ! $is_single_tag ) { 2555 $attributes = trim( substr( $attributes, 0, -1 ) ) . "></$tag"; 2516 2556 } 2517 } elseif ( in_array( $tag, $single_tags ) ) { // ElseIf it's a known single-entity tag but it doesn't close itself, do so 2518 $regex[2] .= '/'; 2557 } elseif ( $is_single_tag ) { // ElseIf it's a known single-entity tag but it doesn't close itself, do so 2558 $pre_attribute_ws = ' '; 2559 $attributes .= '/'; 2519 2560 } else { // Else it's not a single-entity tag 2520 2561 // If the top of the stack is the same as the tag we want to push, close previous tag 2521 2562 if ( $stacksize > 0 && ! in_array( $tag, $nestable_tags ) && $tagstack[ $stacksize - 1 ] == $tag ) { … … function force_balance_tags( $text ) { 2526 2567 } 2527 2568 2528 2569 // Attributes 2529 $attributes = $regex[2];2530 if ( ! empty( $attributes ) && $attributes[0] != '>' ) {2531 $ attributes = ' ' . $attributes;2570 if ( $has_self_closer && $is_single_tag ) { 2571 // we need some space - avoid <br/> and prefer <br /> 2572 $pre_attribute_ws = ' '; 2532 2573 } 2533 2574 2534 $tag = '<' . $tag . $ attributes . '>';2575 $tag = '<' . $tag . $pre_attribute_ws . $attributes . '>'; 2535 2576 //If already queuing a close tag, then put this tag on, too 2536 2577 if ( ! empty( $tagqueue ) ) { 2537 2578 $tagqueue .= $tag; -
tests/phpunit/tests/formatting/balanceTags.php
diff --git a/tests/phpunit/tests/formatting/balanceTags.php b/tests/phpunit/tests/formatting/balanceTags.php index 783c320780..0794dc47f4 100644
a b class Tests_Formatting_BalanceTags extends WP_UnitTestCase { 37 37 ); 38 38 } 39 39 40 function supported_traditional_tag_names() { 41 return array( 42 array( 'a' ), 43 array( 'div' ), 44 array( 'blockquote' ), 45 // HTML tag names can be CAPITALIZED and are case-insensitive. 46 array( 'A' ), 47 array( 'dIv' ), 48 array( 'BLOCKQUOTE' ), 49 ); 50 } 51 52 function supported_custom_element_tag_names() { 53 return array( 54 array( 'custom-element' ), 55 array( 'my-custom-element' ), 56 array( 'weekday-5-item' ), 57 array( 'a-big-old-tag-name' ), 58 array( 'with_underscores-and_the_dash' ), 59 array( 'a-.' ), 60 array( 'a._-.-_' ), 61 ); 62 } 63 64 function invalid_tag_names() { 65 return array( 66 array( '<0-day>inside', '<0-day>inside' ), // Can't start with a number - handled by the "<3" "fix." 67 array( '<UPPERCASE-TAG>inside', '<UPPERCASE-TAG>inside' ), // Custom elements cannot be uppercase. 68 ); 69 } 70 71 // These are valid custom elements but we don't support them yet. 72 // see https://w3c.github.io/webcomponents/spec/custom/#valid-custom-element-name 73 function unsupported_valid_tag_names() { 74 return array( 75 // We don't allow ending in a dash. 76 array( '<what->inside' ), 77 // Examples from the spec working document. 78 array( 'math-α' ), 79 array( 'emotion-😍' ), 80 // UNICODE ranges 81 // middle dot 82 // 0x00b7 83 array( 'b-·' ), 84 // latin characters with accents/modifiers 85 // 0x00c0-0x00d6 86 // 0x00d8-0x00f6 87 array( 'a-À-Ó-Ý' ), 88 // 0x00f8-0x037d 89 array( 'a-ͳ' ), 90 // no 0x037e, which is a Greek semicolon 91 // 0x037f-0x1fff 92 array( 'a-Ფ' ), 93 // zero-width characters, probably never supported 94 // 0x200c-0x200d 95 array( 'a-to-my-left-is-a-zero-width-non-joiner-do-not-delete-it' ), 96 array( 'a-to-my-left-is-a-zero-width-joiner-do-not-delete-it' ), 97 // ties 98 // 0x203f-0x2040 99 array( 'under-‿-tie' ), 100 array( 'over-⁀-tie' ), 101 // 0x2170-0x218f 102 array( 'a-⁰' ), 103 array( 'a-⅀' ), 104 array( 'tag-ↀ-it' ), 105 // 0x2c00-0x2fef 106 array( 'a-Ⰰ' ), 107 array( 'b-ⴓ-c' ), 108 array( 'd-⽗' ), 109 // 0x3001-0xd7ff 110 array( 'a-、' ), 111 array( 'z-态' ), 112 array( 'a-送-䠺-ퟱ-' ), 113 // 0xf900-0xfdcf 114 array( 'a-豈' ), 115 array( 'my-切' ), 116 array( 'aﴀ-tag' ), 117 array( 'my-' ), 118 // 0xfdf0-0xfffd 119 array( 'a-ﷰ' ), 120 array( 'a---�' ), // Warning; blank characters are in there. 121 // extended ranges 122 // 0x10000-0xeffff 123 array( 'a-𐀀' ), 124 array( 'my-𝀀' ), 125 array( 'a𞀀-' ), 126 ); 127 } 128 129 // These are invalid custom elements but we support them right now in order to keep the parser simpler. 130 // see https://w3c.github.io/webcomponents/spec/custom/#valid-custom-element-name 131 function supported_invalid_tag_names() { 132 return array( 133 // reserved names for custom elements 134 array( 'annotation-xml' ), 135 array( 'color-profile' ), 136 array( 'font-face' ), 137 array( 'font-face-src' ), 138 array( 'font-face-uri' ), 139 array( 'font-face-format' ), 140 array( 'font-face-name' ), 141 array( 'missing-glyph' ), 142 ); 143 } 144 145 /** 146 * @dataProvider supported_traditional_tag_names 147 */ 148 function test_detects_traditional_tag_names( $tag ) { 149 $normalized = strtolower( $tag ); 150 151 $this->assertEquals( "<$normalized>inside</$normalized>", balanceTags( "<$tag>inside", true ) ); 152 } 153 154 /** 155 * @dataProvider supported_custom_element_tag_names 156 */ 157 function test_detects_supported_custom_element_tag_names( $tag ) { 158 $this->assertEquals( "<$tag>inside</$tag>", balanceTags( "<$tag>inside", true ) ); 159 } 160 161 /** 162 * @dataProvider invalid_tag_names 163 */ 164 function test_ignores_invalid_tag_names( $input, $output ) { 165 $this->assertEquals( $output, balanceTags( $input, true ) ); 166 } 167 168 /** 169 * @dataProvider unsupported_valid_tag_names 170 */ 171 function test_ignores_unsupported_custom_tag_names( $tag ) { 172 $this->assertEquals( "<$tag>inside", balanceTags( "<$tag>inside", true ) ); 173 } 174 175 /** 176 * @dataProvider supported_invalid_tag_names 177 */ 178 function test_detects_supported_invalid_tag_names( $tag ) { 179 $this->assertEquals( "<$tag>inside</$tag>", balanceTags( "<$tag>inside", true ) ); 180 } 181 40 182 /** 41 183 * If a recognized valid single tag appears unclosed, it should get self-closed 42 184 * … … class Tests_Formatting_BalanceTags extends WP_UnitTestCase { 68 210 '<em />', 69 211 '<p class="main1"/>', 70 212 '<p class="main2" />', 213 '<STRONG/>', 71 214 ); 72 215 $expected = array( 73 216 '<strong></strong>', 74 217 '<em></em>', 75 218 '<p class="main1"></p>', 76 219 '<p class="main2"></p>', 220 // Valid tags are transformed to lowercase. 221 '<strong></strong>', 77 222 ); 78 223 79 224 foreach ( $inputs as $key => $input ) { … … class Tests_Formatting_BalanceTags extends WP_UnitTestCase { 221 366 } 222 367 } 223 368 369 /** 370 * Get custom element data. 371 * 372 * @return array Data. 373 */ 374 public function data_custom_elements() { 375 return array( 376 // Valid custom element tags. 377 array( 378 '<my-custom-element data-attribute="value"/>', 379 '<my-custom-element data-attribute="value"></my-custom-element>', 380 ), 381 array( 382 '<my-custom-element>Test</my-custom-element>', 383 '<my-custom-element>Test</my-custom-element>', 384 ), 385 array( 386 '<my-custom-element>Test', 387 '<my-custom-element>Test</my-custom-element>', 388 ), 389 array( 390 'Test</my-custom-element>', 391 'Test', 392 ), 393 array( 394 '</my-custom-element>Test', 395 'Test', 396 ), 397 array( 398 '<my-custom-element/>', 399 '<my-custom-element></my-custom-element>', 400 ), 401 array( 402 '<my-custom-element />', 403 '<my-custom-element></my-custom-element>', 404 ), 405 // Invalid (or at least temporarily unsupported) custom element tags. 406 array( 407 '<MY-CUSTOM-ELEMENT>Test', 408 '<MY-CUSTOM-ELEMENT>Test', 409 ), 410 array( 411 '<my->Test', 412 '<my->Test', 413 ), 414 array( 415 '<--->Test', 416 '<--->Test', 417 ), 418 ); 419 } 420 421 /** 422 * Test custom elements. 423 * 424 * @ticket 47014 425 * @dataProvider data_custom_elements 426 * 427 * @param string $source Source. 428 * @param string $expected Expected. 429 */ 430 public function test_custom_elements( $source, $expected ) { 431 $this->assertEquals( $expected, balanceTags( $source, true ) ); 432 } 224 433 }