Ticket #47014: 47014.2.diff
| File 47014.2.diff, 13.5 KB (added by , 7 years ago) |
|---|
-
src/wp-includes/formatting.php
2429 2429 * @return string Balanced text 2430 2430 */ 2431 2431 function balanceTags( $text, $force = false ) { // phpcs:ignore WordPress.NamingConventions.ValidFunctionName.FunctionNameInvalid 2432 if ( $force || get_option( 'use_balanceTags' )== 1 ) {2432 if ( $force || (int) get_option( 'use_balanceTags' ) === 1 ) { 2433 2433 return force_balance_tags( $text ); 2434 2434 } else { 2435 2435 return $text; … … 2440 2440 * Balances tags of string using a modified stack. 2441 2441 * 2442 2442 * @since 2.0.4 2443 * @since 5.3.0 Improve accuracy and add support for custom element tags. 2443 2444 * 2444 2445 * @author Leonard Lin <leonard@acm.org> 2445 2446 * @license GPL … … 2469 2470 // WP bug fix for LOVE <3 (and other situations with '<' before a number) 2470 2471 $text = preg_replace( '#<([0-9]{1})#', '<$1', $text ); 2471 2472 2472 while ( preg_match( '/<(\/?[\w:]*)\s*([^>]*)>/', $text, $regex ) ) { 2473 /** 2474 * Matches supported tags 2475 * 2476 * To get the pattern as a string without the comments paste into a PHP 2477 * REPL like `php -a` 2478 * 2479 * @see https://html.spec.whatwg.org/#elements-2 2480 * @see https://w3c.github.io/webcomponents/spec/custom/#valid-custom-element-name 2481 * 2482 * @example 2483 * ~# php -a 2484 * php > $s = [paste copied contents of expression below including parentheses]; 2485 * php > echo $s; 2486 * 2487 * @type string 2488 */ 2489 $tag_pattern = ( 2490 '#<' . // Start with an opening bracket. 2491 '(/?)' . // Group 1 - If it's a closing tag it'll have a leading slash. 2492 '(' . // Group 2 - Tag name. 2493 // Custom element tags have more lenient rules than HTML tag names. 2494 '(?:[a-z](?:[a-z0-9._]*)-(?:[a-z0-9._-]+)+)' . 2495 '|' . 2496 // Traditional tag rules approximate HTML tag names. 2497 '(?:[\w:]+)' . 2498 ')' . 2499 '(?:' . 2500 // We either immediately close the tag with its '>' and have nothing here. 2501 '\s*' . 2502 '(/?)' . // Group 3 - "attributes" for empty tag. 2503 '|' . 2504 // Or we must start with space characters to separate the tag name from the attributes (or whitespace). 2505 '(\s+)' . // Group 4 - Pre-attribute whitespace. 2506 '([^>]*)' . // Group 5 - Attributes. 2507 ')' . 2508 '>#' // End with a closing bracket. 2509 ); 2510 2511 while ( preg_match( $tag_pattern, $text, $regex ) ) { 2512 $full_match = $regex[0]; 2513 $has_leading_slash = ! empty( $regex[1] ); 2514 $tag_name = $regex[2]; 2515 $tag = strtolower( $tag_name ); 2516 $is_single_tag = in_array( $tag, $single_tags, true ); 2517 $pre_attribute_ws = isset( $regex[4] ) ? $regex[4] : ''; 2518 $attributes = trim( isset( $regex[5] ) ? $regex[5] : $regex[3] ); 2519 $has_self_closer = '/' === substr( $attributes, -1 ); 2520 2473 2521 $newtext .= $tagqueue; 2474 2522 2475 $i = strpos( $text, $ regex[0]);2476 $l = strlen( $ regex[0]);2523 $i = strpos( $text, $full_match ); 2524 $l = strlen( $full_match ); 2477 2525 2478 // clear the shifter2526 // Clear the shifter. 2479 2527 $tagqueue = ''; 2480 // Pop or Push 2481 if ( isset( $regex[1][0] ) && '/' == $regex[1][0] ) { // End Tag 2482 $tag = strtolower( substr( $regex[1], 1 ) ); 2483 // if too many closing tags 2528 if ( $has_leading_slash ) { // End Tag. 2529 // If too many closing tags. 2484 2530 if ( $stacksize <= 0 ) { 2485 2531 $tag = ''; 2486 // or close to be safe $tag = '/' . $tag;2532 // Or close to be safe $tag = '/' . $tag. 2487 2533 2488 // if stacktop value = tag close value then pop 2489 } elseif ( $tagstack[ $stacksize - 1 ] == $tag ) { // found closing tag 2490 $tag = '</' . $tag . '>'; // Close Tag 2491 // Pop 2534 // If stacktop value = tag close value, then pop. 2535 } elseif ( $tagstack[ $stacksize - 1 ] === $tag ) { // Found closing tag. 2536 $tag = '</' . $tag . '>'; // Close Tag. 2492 2537 array_pop( $tagstack ); 2493 2538 $stacksize--; 2494 } else { // closing tag not at top, search for it2539 } else { // Closing tag not at top, search for it. 2495 2540 for ( $j = $stacksize - 1; $j >= 0; $j-- ) { 2496 if ( $tagstack[ $j ] == $tag ) {2497 // add tag to tagqueue2541 if ( $tagstack[ $j ] === $tag ) { 2542 // Add tag to tagqueue. 2498 2543 for ( $k = $stacksize - 1; $k >= $j; $k-- ) { 2499 2544 $tagqueue .= '</' . array_pop( $tagstack ) . '>'; 2500 2545 $stacksize--; … … 2504 2549 } 2505 2550 $tag = ''; 2506 2551 } 2507 } else { // Begin Tag 2508 $tag = strtolower( $regex[1] ); 2509 2510 // Tag Cleaning 2511 2512 // If it's an empty tag "< >", do nothing 2513 if ( '' == $tag ) { 2514 // do nothing 2515 } elseif ( substr( $regex[2], -1 ) == '/' ) { // ElseIf it presents itself as a self-closing tag... 2552 } else { // Begin Tag. 2553 if ( $has_self_closer ) { // If it presents itself as a self-closing tag... 2516 2554 // ...but it isn't a known single-entity self-closing tag, then don't let it be treated as such and 2517 2555 // immediately close it with a closing tag (the tag will encapsulate no text as a result) 2518 if ( ! in_array( $tag, $single_tags )) {2519 $ regex[2] = trim( substr( $regex[2], 0, -1 ) ) . "></$tag";2556 if ( ! $is_single_tag ) { 2557 $attributes = trim( substr( $attributes, 0, -1 ) ) . "></$tag"; 2520 2558 } 2521 } elseif ( in_array( $tag, $single_tags ) ) { // ElseIf it's a known single-entity tag but it doesn't close itself, do so 2522 $regex[2] .= '/'; 2523 } else { // Else it's not a single-entity tag 2524 // If the top of the stack is the same as the tag we want to push, close previous tag 2525 if ( $stacksize > 0 && ! in_array( $tag, $nestable_tags ) && $tagstack[ $stacksize - 1 ] == $tag ) { 2559 } elseif ( $is_single_tag ) { // ElseIf it's a known single-entity tag but it doesn't close itself, do so 2560 $pre_attribute_ws = ' '; 2561 $attributes .= '/'; 2562 } else { // It's not a single-entity tag. 2563 // If the top of the stack is the same as the tag we want to push, close previous tag. 2564 if ( $stacksize > 0 && ! in_array( $tag, $nestable_tags, true ) && $tagstack[ $stacksize - 1 ] === $tag ) { 2526 2565 $tagqueue = '</' . array_pop( $tagstack ) . '>'; 2527 2566 $stacksize--; 2528 2567 } 2529 2568 $stacksize = array_push( $tagstack, $tag ); 2530 2569 } 2531 2570 2532 // Attributes 2533 $attributes = $regex[2];2534 if ( ! empty( $attributes ) && $attributes[0] != '>' ) {2535 $ attributes = ' ' . $attributes;2571 // Attributes. 2572 if ( $has_self_closer && $is_single_tag ) { 2573 // We need some space - avoid <br/> and prefer <br />. 2574 $pre_attribute_ws = ' '; 2536 2575 } 2537 2576 2538 $tag = '<' . $tag . $ attributes . '>';2539 // If already queuing a close tag, then put this tag on, too2577 $tag = '<' . $tag . $pre_attribute_ws . $attributes . '>'; 2578 // If already queuing a close tag, then put this tag on too. 2540 2579 if ( ! empty( $tagqueue ) ) { 2541 2580 $tagqueue .= $tag; 2542 2581 $tag = ''; … … 2546 2585 $text = substr( $text, $i + $l ); 2547 2586 } 2548 2587 2549 // Clear Tag Queue 2588 // Clear Tag Queue. 2550 2589 $newtext .= $tagqueue; 2551 2590 2552 // Add Remaining text2591 // Add remaining text. 2553 2592 $newtext .= $text; 2554 2593 2555 // Empty Stack2556 2594 while ( $x = array_pop( $tagstack ) ) { 2557 $newtext .= '</' . $x . '>'; // Add remaining tags to close 2595 $newtext .= '</' . $x . '>'; // Add remaining tags to close. 2558 2596 } 2559 2597 2560 // WP fix for the bug with HTML comments 2598 // WP fix for the bug with HTML comments. 2561 2599 $newtext = str_replace( '< !--', '<!--', $newtext ); 2562 2600 $newtext = str_replace( '< !--', '< !--', $newtext ); 2563 2601 -
tests/phpunit/tests/formatting/balanceTags.php
37 37 ); 38 38 } 39 39 40 function supported_traditional_tag_names() { 41 return array( 42 array( 'a' ), 43 array( 'div' ), 44 array( 'blockquote' ), 45 // HTML tag names can be CAPITALIZED and are case-insensitive. 46 array( 'A' ), 47 array( 'dIv' ), 48 array( 'BLOCKQUOTE' ), 49 ); 50 } 51 52 function supported_custom_element_tag_names() { 53 return array( 54 array( 'custom-element' ), 55 array( 'my-custom-element' ), 56 array( 'weekday-5-item' ), 57 array( 'a-big-old-tag-name' ), 58 array( 'with_underscores-and_the_dash' ), 59 array( 'a-.' ), 60 array( 'a._-.-_' ), 61 ); 62 } 63 64 function invalid_tag_names() { 65 return array( 66 array( '<0-day>inside', '<0-day>inside' ), // Can't start with a number - handled by the "<3" fix. 67 array( '<UPPERCASE-TAG>inside', '<UPPERCASE-TAG>inside' ), // Custom elements cannot be uppercase. 68 ); 69 } 70 71 /** 72 * These are valid custom elements but we don't support them yet. 73 * 74 * @see https://w3c.github.io/webcomponents/spec/custom/#valid-custom-element-name 75 */ 76 function unsupported_valid_tag_names() { 77 return array( 78 // We don't allow ending in a dash. 79 array( '<what->inside' ), 80 // Examples from the spec working document. 81 array( 'math-α' ), 82 array( 'emotion-😍' ), 83 // UNICODE ranges 84 // 0x00b7 85 array( 'b-·' ), 86 // Latin characters with accents/modifiers. 87 // 0x00c0-0x00d6 88 // 0x00d8-0x00f6 89 array( 'a-À-Ó-Ý' ), 90 // 0x00f8-0x037d 91 array( 'a-ͳ' ), 92 // No 0x037e, which is a Greek semicolon. 93 // 0x037f-0x1fff 94 array( 'a-Ფ' ), 95 // Zero-width characters, probably never supported. 96 // 0x200c-0x200d 97 array( 'a-to-my-left-is-a-zero-width-non-joiner-do-not-delete-it' ), 98 array( 'a-to-my-left-is-a-zero-width-joiner-do-not-delete-it' ), 99 // Ties. 100 // 0x203f-0x2040 101 array( 'under-‿-tie' ), 102 array( 'over-⁀-tie' ), 103 // 0x2170-0x218f 104 array( 'a-⁰' ), 105 array( 'a-⅀' ), 106 array( 'tag-ↀ-it' ), 107 // 0x2c00-0x2fef 108 array( 'a-Ⰰ' ), 109 array( 'b-ⴓ-c' ), 110 array( 'd-⽗' ), 111 // 0x3001-0xd7ff 112 array( 'a-、' ), 113 array( 'z-态' ), 114 array( 'a-送-䠺-ퟱ-' ), 115 // 0xf900-0xfdcf 116 array( 'a-豈' ), 117 array( 'my-切' ), 118 array( 'aﴀ-tag' ), 119 array( 'my-' ), 120 // 0xfdf0-0xfffd 121 array( 'a-ﷰ' ), 122 array( 'a---�' ), // Warning; blank characters are in there. 123 // Extended ranges. 124 // 0x10000-0xeffff 125 array( 'a-𐀀' ), 126 array( 'my-𝀀' ), 127 array( 'a𞀀-' ), 128 ); 129 } 130 131 /** 132 * These are invalid custom elements but we support them right now in order to keep the parser simpler. 133 * 134 * @see https://w3c.github.io/webcomponents/spec/custom/#valid-custom-element-name 135 */ 136 function supported_invalid_tag_names() { 137 return array( 138 // Reserved names for custom elements. 139 array( 'annotation-xml' ), 140 array( 'color-profile' ), 141 array( 'font-face' ), 142 array( 'font-face-src' ), 143 array( 'font-face-uri' ), 144 array( 'font-face-format' ), 145 array( 'font-face-name' ), 146 array( 'missing-glyph' ), 147 ); 148 } 149 150 /** 151 * @ticket 47014 152 * @dataProvider supported_traditional_tag_names 153 */ 154 function test_detects_traditional_tag_names( $tag ) { 155 $normalized = strtolower( $tag ); 156 157 $this->assertEquals( "<$normalized>inside</$normalized>", balanceTags( "<$tag>inside", true ) ); 158 } 159 160 /** 161 * @ticket 47014 162 * @dataProvider supported_custom_element_tag_names 163 */ 164 function test_detects_supported_custom_element_tag_names( $tag ) { 165 $this->assertEquals( "<$tag>inside</$tag>", balanceTags( "<$tag>inside", true ) ); 166 } 167 168 /** 169 * @ticket 47014 170 * @dataProvider invalid_tag_names 171 */ 172 function test_ignores_invalid_tag_names( $input, $output ) { 173 $this->assertEquals( $output, balanceTags( $input, true ) ); 174 } 175 176 /** 177 * @ticket 47014 178 * @dataProvider unsupported_valid_tag_names 179 */ 180 function test_ignores_unsupported_custom_tag_names( $tag ) { 181 $this->assertEquals( "<$tag>inside", balanceTags( "<$tag>inside", true ) ); 182 } 183 184 /** 185 * @ticket 47014 186 * @dataProvider supported_invalid_tag_names 187 */ 188 function test_detects_supported_invalid_tag_names( $tag ) { 189 $this->assertEquals( "<$tag>inside</$tag>", balanceTags( "<$tag>inside", true ) ); 190 } 191 40 192 /** 41 193 * If a recognized valid single tag appears unclosed, it should get self-closed 42 194 * … … 68 220 '<em />', 69 221 '<p class="main1"/>', 70 222 '<p class="main2" />', 223 '<STRONG/>', 71 224 ); 72 225 $expected = array( 73 226 '<strong></strong>', 74 227 '<em></em>', 75 228 '<p class="main1"></p>', 76 229 '<p class="main2"></p>', 230 // Valid tags are transformed to lowercase. 231 '<strong></strong>', 77 232 ); 78 233 79 234 foreach ( $inputs as $key => $input ) { … … 221 376 } 222 377 } 223 378 379 /** 380 * Get custom element data. 381 * 382 * @return array Data. 383 */ 384 public function data_custom_elements() { 385 return array( 386 // Valid custom element tags. 387 array( 388 '<my-custom-element data-attribute="value"/>', 389 '<my-custom-element data-attribute="value"></my-custom-element>', 390 ), 391 array( 392 '<my-custom-element>Test</my-custom-element>', 393 '<my-custom-element>Test</my-custom-element>', 394 ), 395 array( 396 '<my-custom-element>Test', 397 '<my-custom-element>Test</my-custom-element>', 398 ), 399 array( 400 'Test</my-custom-element>', 401 'Test', 402 ), 403 array( 404 '</my-custom-element>Test', 405 'Test', 406 ), 407 array( 408 '<my-custom-element/>', 409 '<my-custom-element></my-custom-element>', 410 ), 411 array( 412 '<my-custom-element />', 413 '<my-custom-element></my-custom-element>', 414 ), 415 // Invalid (or at least temporarily unsupported) custom element tags. 416 array( 417 '<MY-CUSTOM-ELEMENT>Test', 418 '<MY-CUSTOM-ELEMENT>Test', 419 ), 420 array( 421 '<my->Test', 422 '<my->Test', 423 ), 424 array( 425 '<--->Test', 426 '<--->Test', 427 ), 428 ); 429 } 430 431 /** 432 * Test custom elements. 433 * 434 * @ticket 47014 435 * @dataProvider data_custom_elements 436 * 437 * @param string $source Source. 438 * @param string $expected Expected. 439 */ 440 public function test_custom_elements( $source, $expected ) { 441 $this->assertEquals( $expected, balanceTags( $source, true ) ); 442 } 224 443 }