Changeset 27839
- Timestamp:
- 03/29/2014 07:15:33 AM (11 years ago)
- Location:
- trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/formatting.php
r27761 r27839 74 74 $static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace ); 75 75 76 /* 77 * Regex for common whitespace characters. 78 * 79 * By default, spaces include new lines, tabs, nbsp entities, and the UTF-8 nbsp. 80 * This is designed to replace the PCRE \s sequence. In #WP22692, that sequence 81 * was found to be unreliable due to random inclusion of the A0 byte. 82 */ 83 $spaces = '[\r\n\t ]|\xC2\xA0| '; 84 85 86 // Pattern-based replacements of characters. 76 87 $dynamic = array(); 77 if ( "'" != $apos ) { 78 $dynamic[ '/\'(\d\d(?:’|\')?s)/' ] = $apos . '$1'; // '99's 79 $dynamic[ '/\'(\d)/' ] = $apos . '$1'; // '99 80 } 88 89 // '99 '99s '99's (apostrophe) 90 if ( "'" != $apos ) 91 $dynamic[ '/\'(?=\d)/' ] = $apos; 92 93 // Single quote at start, or preceded by (, {, <, [, ", or spaces. 81 94 if ( "'" != $opening_single_quote ) 82 $dynamic[ '/(\s|\A|[([{<]|")\'/' ] = '$1' . $opening_single_quote; // opening single quote, even after (, {, <, [ 95 $dynamic[ '/(?<=\A|[([{<"]|' . $spaces . ')\'/' ] = $opening_single_quote; 96 97 // 9" (double prime) 83 98 if ( '"' != $double_prime ) 84 $dynamic[ '/(\d)"/' ] = '$1' . $double_prime; // 9" (double prime) 99 $dynamic[ '/(?<=\d)"/' ] = $double_prime; 100 101 // 9' (prime) 85 102 if ( "'" != $prime ) 86 $dynamic[ '/(\d)\'/' ] = '$1' . $prime; // 9' (prime) 103 $dynamic[ '/(?<=\d)\'/' ] = $prime; 104 105 // Apostrophe in a word. No spaces or double primes. 87 106 if ( "'" != $apos ) 88 $dynamic[ '/(\S)\'([^\'\s])/' ] = '$1' . $apos . '$2'; // apostrophe in a word 107 $dynamic[ '/(?<!' . $spaces . ')\'(?!\'|' . $spaces . ')/' ] = $apos; 108 109 // Double quote at start, or preceded by (, {, <, [, or spaces, and not followed by spaces. 89 110 if ( '"' != $opening_quote ) 90 $dynamic[ '/(\s|\A|[([{<])"(?!\s)/' ] = '$1' . $opening_quote . '$2'; // opening double quote, even after (, {, <, [ 111 $dynamic[ '/(?<=\A|[([{<]|' . $spaces . ')"(?!' . $spaces . ')/' ] = $opening_quote; 112 113 // Any remaining double quotes. 91 114 if ( '"' != $closing_quote ) 92 $dynamic[ '/"(\s|\S|\Z)/' ] = $closing_quote . '$1'; // closing double quote 115 $dynamic[ '/"/' ] = $closing_quote; 116 117 // Single quotes followed by spaces or a period. 93 118 if ( "'" != $closing_single_quote ) 94 $dynamic[ '/\'([\s.]|\Z)/' ] = $closing_single_quote . '$1'; // closing single quote 95 96 $dynamic[ '/\b(\d+)x(\d+)\b/' ] = '$1×$2'; // 9x9 (times) 119 $dynamic[ '/\'(?=\Z|\.|' . $spaces . ')/' ] = $closing_single_quote; 97 120 98 121 $dynamic_characters = array_keys( $dynamic ); … … 135 158 _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']'); 136 159 } elseif ( empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack) ) { 160 137 161 // This is not a tag, nor is the texturization disabled static strings 138 162 $curl = str_replace($static_characters, $static_replacements, $curl); 163 139 164 // regular expressions 140 165 $curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl); 166 167 // 9x9 (times) 168 if ( 1 === preg_match( '/(?<=\d)x\d/', $text ) ) { 169 // Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one! 170 $curl = preg_replace( '/\b(\d+)x(\d+)\b/', '$1×$2', $curl ); 171 } 141 172 } 173 174 // Replace each & with & unless it already looks like an entity. 142 175 $curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&$1', $curl); 143 176 } -
trunk/tests/phpunit/tests/formatting/WPTexturize.php
r25002 r27839 15 15 $this->assertEquals('<pre><code></code>--</pre>', wptexturize('<pre><code></code>--</pre>')); 16 16 17 $this->assertEquals('<code>---</code>', wptexturize('<code>---</code>')); 17 $this->assertEquals( '<code>---</code>', wptexturize( '<code>---</code>' ) ); 18 $this->assertEquals( '<kbd>---</kbd>', wptexturize( '<kbd>---</kbd>' ) ); 19 $this->assertEquals( '<style>---</style>', wptexturize( '<style>---</style>' ) ); 20 $this->assertEquals( '<script>---</script>', wptexturize( '<script>---</script>' ) ); 21 $this->assertEquals( '<tt>---</tt>', wptexturize( '<tt>---</tt>' ) ); 18 22 19 23 $this->assertEquals('<code>href="baba"</code> “baba”', wptexturize('<code>href="baba"</code> "baba"')); … … 44 48 //WP Ticket #4539 45 49 function test_basic_quotes() { 46 $this->assertEquals('test’s', wptexturize('test\'s'));47 50 $this->assertEquals('test’s', wptexturize('test\'s')); 48 51 … … 195 198 $this->assertEquals( ' — ', wptexturize( ' -- ') ); 196 199 } 200 201 /** 202 * Test spaces around quotes. 203 * 204 * These should never happen, even if the desired output changes some day. 205 * 206 * @ticket 22692 207 */ 208 function test_spaces_around_quotes_never() { 209 $nbsp = "\xC2\xA0"; 210 211 $problem_input = "$nbsp\"A"; 212 $problem_output = "$nbsp”A"; 213 214 $this->assertNotEquals( $problem_output, wptexturize( $problem_input ) ); 215 } 216 217 /** 218 * Test spaces around quotes. 219 * 220 * These are desirable outputs for the current design. 221 * 222 * @ticket 22692 223 * @dataProvider data_spaces_around_quotes 224 */ 225 function test_spaces_around_quotes( $input, $output ) { 226 return $this->assertEquals( $output, wptexturize( $input ) ); 227 } 228 229 function data_spaces_around_quotes() { 230 $nbsp = "\xC2\xA0"; 231 $pi = "\xCE\xA0"; 232 233 return array( 234 array( 235 "stop. $nbsp\"A quote after 2 spaces.\"", 236 "stop. $nbsp“A quote after 2 spaces.”", 237 ), 238 array( 239 "stop.$nbsp$nbsp\"A quote after 2 spaces.\"", 240 "stop.$nbsp$nbsp“A quote after 2 spaces.”", 241 ), 242 array( 243 "stop. $nbsp'A quote after 2 spaces.'", 244 "stop. $nbsp‘A quote after 2 spaces.’", 245 ), 246 array( 247 "stop.$nbsp$nbsp'A quote after 2 spaces.'", 248 "stop.$nbsp$nbsp‘A quote after 2 spaces.’", 249 ), 250 array( 251 "stop. \"A quote after 2 spaces.\"", 252 "stop. “A quote after 2 spaces.”", 253 ), 254 array( 255 "stop. \"A quote after 2 spaces.\"", 256 "stop. “A quote after 2 spaces.”", 257 ), 258 array( 259 "stop. 'A quote after 2 spaces.'", 260 "stop. ‘A quote after 2 spaces.’", 261 ), 262 array( 263 "stop. 'A quote after 2 spaces.'", 264 "stop. ‘A quote after 2 spaces.’", 265 ), 266 array( 267 "Contraction: $pi's", 268 "Contraction: $pi’s", 269 ), 270 ); 271 } 272 273 /** 274 * Apostrophe before a number always becomes ’ (apos); 275 * 276 * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. 277 * 278 * @ticket 22692 279 * @dataProvider data_apos_before_digits 280 */ 281 function test_apos_before_digits( $input, $output ) { 282 return $this->assertEquals( $output, wptexturize( $input ) ); 283 } 284 285 function data_apos_before_digits() { 286 return array( 287 array( 288 "word '99 word", 289 "word ’99 word", 290 ), 291 array( 292 "word'99 word", 293 "word’99 word", 294 ), 295 array( 296 "word '99word", 297 "word ’99word", 298 ), 299 array( 300 "word'99word", 301 "word’99word", 302 ), 303 array( 304 "word '99’s word", // Appears as a separate but logically superfluous pattern in 3.8. 305 "word ’99’s word", 306 ), 307 array( 308 "word '99's word", // Due to the logic error, second apos becomes a prime. See ticket #22823 309 "word ’99′s word", 310 ), 311 array( 312 "word '99'samsonite", 313 "word ’99′samsonite", 314 ), 315 array( 316 "according to our source, '33% of all students scored less than 50' on the test.", // Apostrophes and primes have priority over quotes 317 "according to our source, ’33% of all students scored less than 50′ on the test.", 318 ), 319 array( 320 "word '99' word", // See ticket #8775 321 "word ’99′ word", 322 ), 323 ); 324 } 325 326 /** 327 * Apostrophe after a space or ([{<" becomes ‘ (opening_single_quote) 328 * 329 * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. 330 * 331 * @ticket 22692 332 * @dataProvider data_opening_single_quote 333 */ 334 function test_opening_single_quote( $input, $output ) { 335 return $this->assertEquals( $output, wptexturize( $input ) ); 336 } 337 338 function data_opening_single_quote() { 339 return array( 340 array( 341 "word 'word word", 342 "word ‘word word", 343 ), 344 array( 345 "word ('word word", 346 "word (‘word word", 347 ), 348 array( 349 "word ['word word", 350 "word [‘word word", 351 ), 352 array( 353 "word <'word word", // Invalid HTML input? 354 "word <‘word word", 355 ), 356 array( 357 "word <'word word", // Valid HTML input triggers the apos in a word pattern 358 "word <’word word", 359 ), 360 array( 361 "word {'word word", 362 "word {‘word word", 363 ), 364 array( 365 "word \"'word word", 366 "word “‘word word", // Two opening quotes 367 ), 368 array( 369 "'word word", 370 "‘word word", 371 ), 372 array( 373 "word('word word", 374 "word(‘word word", 375 ), 376 array( 377 "word['word word", 378 "word[‘word word", 379 ), 380 array( 381 "word<'word word", 382 "word<‘word word", 383 ), 384 array( 385 "word<'word word", 386 "word<’word word", 387 ), 388 array( 389 "word{'word word", 390 "word{‘word word", 391 ), 392 array( 393 "word\"'word word", 394 "word”‘word word", // Closing quote, then opening quote 395 ), 396 array( 397 "word ' word word", 398 "word ‘ word word", 399 ), 400 array( 401 "word (' word word", 402 "word (‘ word word", 403 ), 404 array( 405 "word [' word word", 406 "word [‘ word word", 407 ), 408 array( 409 "word <' word word", // Invalid HTML input? 410 "word <‘ word word", 411 ), 412 array( 413 "word <' word word", // Valid HTML input triggers the closing single quote here 414 "word <’ word word", 415 ), 416 array( 417 "word {' word word", 418 "word {‘ word word", 419 ), 420 array( 421 "word \"' word word", 422 "word “‘ word word", // Two opening quotes 423 ), 424 array( 425 "' word word", 426 "‘ word word", 427 ), 428 array( 429 "word(' word word", 430 "word(‘ word word", 431 ), 432 array( 433 "word[' word word", 434 "word[‘ word word", 435 ), 436 array( 437 "word<' word word", 438 "word<‘ word word", 439 ), 440 array( 441 "word<' word word", 442 "word<’ word word", 443 ), 444 array( 445 "word{' word word", 446 "word{‘ word word", 447 ), 448 array( 449 "word\"' word word", 450 "word”‘ word word", // Closing quote, then opening quote 451 ), 452 ); 453 } 454 455 /** 456 * Double quote after a number becomes ″ (double_prime) 457 * 458 * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. 459 * 460 * @ticket 22692 461 * @dataProvider data_double_prime 462 */ 463 function test_double_prime( $input, $output ) { 464 return $this->assertEquals( $output, wptexturize( $input ) ); 465 } 466 467 function data_double_prime() { 468 return array( 469 array( 470 'word 99" word', 471 'word 99″ word', 472 ), 473 array( 474 'word 99"word', 475 'word 99″word', 476 ), 477 array( 478 'word99" word', 479 'word99″ word', 480 ), 481 array( 482 'word99"word', 483 'word99″word', 484 ), 485 ); 486 } 487 488 /** 489 * Apostrophe after a number becomes ′ (prime) 490 * 491 * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. 492 * 493 * @ticket 22692 494 * @dataProvider data_single_prime 495 */ 496 function test_single_prime( $input, $output ) { 497 return $this->assertEquals( $output, wptexturize( $input ) ); 498 } 499 500 function data_single_prime() { 501 return array( 502 array( 503 "word 99' word", 504 "word 99′ word", 505 ), 506 array( 507 "word 99'word", 508 "word 99′word", 509 ), 510 array( 511 "word99' word", 512 "word99′ word", 513 ), 514 array( 515 "word99'word", 516 "word99′word", 517 ), 518 ); 519 } 520 521 /** 522 * Apostrophe "in a word" becomes ’ (apos) 523 * 524 * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. 525 * 526 * @ticket 22692 527 * @dataProvider data_contractions 528 */ 529 function test_contractions( $input, $output ) { 530 return $this->assertEquals( $output, wptexturize( $input ) ); 531 } 532 533 function data_contractions() { 534 return array( 535 array( 536 "word word's word", 537 "word word’s word", 538 ), 539 array( 540 "word word'. word", // Quotes with outside punctuation could end with apostrophes instead of closing quotes (may affect i18n) 541 "word word’. word", 542 ), 543 array( 544 "word ]'. word", 545 "word ]’. word", 546 ), 547 array( 548 "word )'. word", 549 "word )’. word", 550 ), 551 array( 552 "word }'. word", 553 "word }’. word", 554 ), 555 array( 556 "word >'. word", // Not tested 557 "word >’. word", 558 ), 559 array( 560 "word >'. word", 561 "word >’. word", 562 ), 563 ); 564 } 565 566 /** 567 * Double quote after a space or ([{< becomes “ (opening_quote) if not followed by spaces 568 * 569 * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. 570 * 571 * @ticket 22692 572 * @dataProvider data_opening_quote 573 */ 574 function test_opening_quote( $input, $output ) { 575 return $this->assertEquals( $output, wptexturize( $input ) ); 576 } 577 578 function data_opening_quote() { 579 return array( 580 array( 581 'word "word word', 582 'word “word word', 583 ), 584 array( 585 'word ("word word', 586 'word (“word word', 587 ), 588 array( 589 'word ["word word', 590 'word [“word word', 591 ), 592 array( 593 'word <"word word', // Invalid HTML input? 594 'word <“word word', 595 ), 596 array( 597 'word <"word word', // Valid HTML input triggers the closing quote pattern 598 'word <”word word', 599 ), 600 array( 601 'word {"word word', 602 'word {“word word', 603 ), 604 array( 605 '"word word', 606 '“word word', 607 ), 608 array( 609 'word("word word', 610 'word(“word word', 611 ), 612 array( 613 'word["word word', 614 'word[“word word', 615 ), 616 array( 617 'word<"word word', // Invalid HTML input? 618 'word<“word word', 619 ), 620 array( 621 'word<"word word', // Valid HTML input triggers the closing quote pattern 622 'word<”word word', 623 ), 624 array( 625 'word{"word word', 626 'word{“word word', 627 ), 628 array( 629 'word "99 word', 630 'word “99 word', 631 ), 632 ); 633 } 634 635 /** 636 * Double quote becomes ” (closing_quote) unless it is already converted to double_prime or opening_quote. 637 * 638 * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. 639 * 640 * @ticket 22692 641 * @dataProvider data_closing_quote 642 */ 643 function test_closing_quote( $input, $output ) { 644 return $this->assertEquals( $output, wptexturize( $input ) ); 645 } 646 647 function data_closing_quote() { 648 return array( 649 array( 650 'word word" word', 651 'word word” word', 652 ), 653 array( 654 'word word") word', 655 'word word”) word', 656 ), 657 array( 658 'word word"] word', 659 'word word”] word', 660 ), 661 array( 662 'word word"} word', 663 'word word”} word', 664 ), 665 array( 666 'word word"> word', // Invalid HTML input? 667 'word word”> word', 668 ), 669 array( 670 'word word"> word', // Valid HTML should work 671 'word word”> word', 672 ), 673 array( 674 'word word"', 675 'word word”', 676 ), 677 array( 678 'word word"word', 679 'word word”word', 680 ), 681 array( 682 'word"word"word', 683 'word”word”word', 684 ), 685 array( 686 'test sentence".', 687 'test sentence”.', 688 ), 689 array( 690 'test sentence."', 691 'test sentence.”', 692 ), 693 array( 694 'test sentence". word', 695 'test sentence”. word', 696 ), 697 array( 698 'test sentence." word', 699 'test sentence.” word', 700 ), 701 ); 702 } 703 704 /** 705 * Test that single quotes followed by a space or a period become ’ (closing_single_quote) 706 * 707 * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. 708 * 709 * @ticket 22692 710 * @dataProvider data_closing_single_quote 711 */ 712 function test_closing_single_quote( $input, $output ) { 713 return $this->assertEquals( $output, wptexturize( $input ) ); 714 } 715 716 function data_closing_single_quote() { 717 return array( 718 array( 719 "word word' word", 720 "word word’ word", 721 ), 722 array( 723 "word word'. word", 724 "word word’. word", 725 ), 726 array( 727 "word word'.word", 728 "word word’.word", 729 ), 730 array( 731 "word word'", 732 "word word’", 733 ), 734 array( 735 "test sentence'.", 736 "test sentence’.", 737 ), 738 array( 739 "test sentence.'", 740 "test sentence.’", 741 ), 742 array( 743 "test sentence'. word", 744 "test sentence’. word", 745 ), 746 array( 747 "test sentence.' word", 748 "test sentence.’ word", 749 ), 750 ); 751 } 752 753 /** 754 * Tests multiplication. 755 * 756 * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. 757 * 758 * @ticket 22692 759 * @dataProvider data_multiplication 760 */ 761 function test_multiplication( $input, $output ) { 762 return $this->assertEquals( $output, wptexturize( $input ) ); 763 } 764 765 function data_multiplication() { 766 return array( 767 array( 768 "9x9", 769 "9×9", 770 ), 771 array( 772 "12x34", 773 "12×34", 774 ), 775 array( 776 "9 x 9", 777 "9 x 9", 778 ), 779 ); 780 } 781 782 /** 783 * Test ampersands. & always becomes & unless it is followed by # or ; 784 * 785 * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. 786 * 787 * @ticket 22692 788 * @dataProvider data_ampersand 789 */ 790 function test_ampersand( $input, $output ) { 791 return $this->assertEquals( $output, wptexturize( $input ) ); 792 } 793 794 function data_ampersand() { 795 return array( 796 array( 797 "word & word", 798 "word & word", 799 ), 800 array( 801 "word&word", 802 "word&word", 803 ), 804 array( 805 "word word", 806 "word word", 807 ), 808 array( 809 "word & word", 810 "word & word", 811 ), 812 array( 813 "word &# word", 814 "word &# word", // invalid output? 815 ), 816 array( 817 "word &44; word", 818 "word &44; word", 819 ), 820 array( 821 "word && word", 822 "word && word", 823 ), 824 array( 825 "word &!amp; word", 826 "word &!amp; word", 827 ), 828 ); 829 } 830 831 /** 832 * Test "cockney" phrases, which begin with an apostrophe instead of an opening single quote. 833 * 834 * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. 835 * 836 * @ticket 22692 837 * @dataProvider data_cockney 838 */ 839 function test_cockney( $input, $output ) { 840 return $this->assertEquals( $output, wptexturize( $input ) ); 841 } 842 843 function data_cockney() { 844 return array( 845 array( 846 "word 'tain't word", 847 "word ’tain’t word", 848 ), 849 array( 850 "word 'twere word", 851 "word ’twere word", 852 ), 853 array( 854 "word 'twas word", 855 "word ’twas word", 856 ), 857 array( 858 "word 'tis word", 859 "word ’tis word", 860 ), 861 array( 862 "word 'twill word", 863 "word ’twill word", 864 ), 865 array( 866 "word 'til word", 867 "word ’til word", 868 ), 869 array( 870 "word 'bout word", 871 "word ’bout word", 872 ), 873 array( 874 "word 'nuff word", 875 "word ’nuff word", 876 ), 877 array( 878 "word 'round word", 879 "word ’round word", 880 ), 881 array( 882 "word 'cause word", 883 "word ’cause word", 884 ), 885 array( 886 "word 'em word", 887 "word ‘em word", 888 ), 889 ); 890 } 891 892 /** 893 * Test smart dashes. 894 * 895 * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. 896 * 897 * @ticket 22692 898 * @dataProvider data_smart_dashes 899 */ 900 function test_smart_dashes( $input, $output ) { 901 return $this->assertEquals( $output, wptexturize( $input ) ); 902 } 903 904 function data_smart_dashes() { 905 return array( 906 array( 907 "word --- word", 908 "word — word", 909 ), 910 array( 911 "word---word", 912 "word—word", 913 ), 914 array( 915 "word -- word", 916 "word — word", 917 ), 918 array( 919 "word--word", 920 "word–word", 921 ), 922 array( 923 "word - word", 924 "word – word", 925 ), 926 array( 927 "word-word", 928 "word-word", 929 ), 930 array( 931 "word xn– word", 932 "word xn-- word", 933 ), 934 array( 935 "wordxn–word", 936 "wordxn--word", 937 ), 938 ); 939 } 940 941 /** 942 * Test miscellaneous static replacements. 943 * 944 * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. 945 * 946 * @ticket 22692 947 * @dataProvider data_misc_static_replacements 948 */ 949 function test_misc_static_replacements( $input, $output ) { 950 return $this->assertEquals( $output, wptexturize( $input ) ); 951 } 952 953 function data_misc_static_replacements() { 954 return array( 955 array( 956 "word ... word", 957 "word … word", 958 ), 959 array( 960 "word...word", 961 "word…word", 962 ), 963 array( 964 "word `` word", 965 "word “ word", 966 ), 967 array( 968 "word``word", 969 "word“word", 970 ), 971 array( 972 "word '' word", 973 "word ” word", 974 ), 975 array( 976 "word''word", 977 "word”word", 978 ), 979 array( 980 "word (tm) word", 981 "word ™ word", 982 ), 983 array( 984 "word (tm)word", 985 "word ™word", 986 ), 987 array( 988 "word(tm) word", 989 "word(tm) word", 990 ), 991 array( 992 "word(tm)word", 993 "word(tm)word", 994 ), 995 ); 996 } 197 997 }
Note: See TracChangeset
for help on using the changeset viewer.