Changeset 34761
- Timestamp:
- 10/02/2015 04:25:40 AM (9 years ago)
- Location:
- trunk
- Files:
-
- 1 added
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/formatting.php
r34747 r34761 220 220 $tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] ); 221 221 $found_shortcodes = ! empty( $tagnames ); 222 if ( $found_shortcodes ) { 223 $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) ); 224 $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex(). 225 $shortcode_regex = 226 '\[' // Find start of shortcode. 227 . '[\/\[]?' // Shortcodes may begin with [/ or [[ 228 . $tagregexp // Only match registered shortcodes, because performance. 229 . '(?:' 230 . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. 231 . '|' 232 . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. 233 . ')*+' // Possessive critical. 234 . '\]' // Find end of shortcode. 235 . '\]?'; // Shortcodes may end with ]] 236 } 237 238 $comment_regex = 239 '!' // Start of comment, after the <. 240 . '(?:' // Unroll the loop: Consume everything until --> is found. 241 . '-(?!->)' // Dash not followed by end of comment. 242 . '[^\-]*+' // Consume non-dashes. 243 . ')*+' // Loop possessively. 244 . '(?:-->)?'; // End of comment. If not found, match all input. 245 246 $html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap. 247 '<' // Find start of element. 248 . '(?(?=!--)' // Is this a comment? 249 . $comment_regex // Find end of comment. 250 . '|' 251 . '[^>]*>?' // Find end of element. If not found, match all input. 252 . ')'; 253 254 if ( $found_shortcodes ) { 255 $regex = '/(' . $html_regex . '|' . $shortcode_regex . ')/s'; 256 } else { 257 $regex = '/(' . $html_regex . ')/s'; 258 } 222 $shortcode_regex = $found_shortcodes ? _get_wptexturize_shortcode_regex( $tagnames ) : ''; 223 $regex = _get_wptexturize_split_regex( $shortcode_regex ); 259 224 260 225 $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); … … 265 230 if ( '<' === $first ) { 266 231 if ( '<!--' === substr( $curl, 0, 4 ) ) { 267 // This is an HTML comment delim eter.232 // This is an HTML comment delimiter. 268 233 continue; 269 234 } else { … … 616 581 */ 617 582 function wp_html_split( $input ) { 583 return preg_split( get_html_split_regex(), $input, -1, PREG_SPLIT_DELIM_CAPTURE ); 584 } 585 586 /** 587 * Retrieve the regular expression for an HTML element. 588 * 589 * @since 4.4.0 590 * 591 * @return string The regular expression 592 */ 593 function get_html_split_regex() { 618 594 static $regex; 619 595 … … 636 612 . '(?:]]>)?'; // End of comment. If not found, match all input. 637 613 614 $escaped = 615 '(?=' // Is the element escaped? 616 . '!--' 617 . '|' 618 . '!\[CDATA\[' 619 . ')' 620 . '(?(?=!-)' // If yes, which type? 621 . $comments 622 . '|' 623 . $cdata 624 . ')'; 625 638 626 $regex = 639 627 '/(' // Capture the entire match. 640 628 . '<' // Find start of element. 641 . '(?(?=!--)' // Is this a comment? 642 . $comments // Find end of comment. 643 . '|' 644 . '(?(?=!\[CDATA\[)' // Is this a comment? 645 . $cdata // Find end of comment. 646 . '|' 647 . '[^>]*>?' // Find end of element. If not found, match all input. 648 . ')' 629 . '(?' // Conditional expression follows. 630 . $escaped // Find end of escaped element. 631 . '|' // ... else ... 632 . '[^>]*>?' // Find end of normal element. 649 633 . ')' 650 . ')/s'; 651 } 652 653 return preg_split( $regex, $input, -1, PREG_SPLIT_DELIM_CAPTURE ); 634 . ')/'; 635 } 636 637 return $regex; 638 } 639 640 /** 641 * Retrieve the combined regular expression for HTML and shortcodes. 642 * 643 * @access private 644 * @ignore 645 * @internal This function will be removed in 4.5.0 per Shortcode API Roadmap. 646 * @since 4.4.0 647 * 648 * @param string $shortcode_regex The result from _get_wptexturize_shortcode_regex(). Optional. 649 * @return string The regular expression 650 */ 651 function _get_wptexturize_split_regex( $shortcode_regex = '' ) { 652 static $html_regex; 653 654 if ( ! isset( $html_regex ) ) { 655 $comment_regex = 656 '!' // Start of comment, after the <. 657 . '(?:' // Unroll the loop: Consume everything until --> is found. 658 . '-(?!->)' // Dash not followed by end of comment. 659 . '[^\-]*+' // Consume non-dashes. 660 . ')*+' // Loop possessively. 661 . '(?:-->)?'; // End of comment. If not found, match all input. 662 663 $html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap. 664 '<' // Find start of element. 665 . '(?(?=!--)' // Is this a comment? 666 . $comment_regex // Find end of comment. 667 . '|' 668 . '[^>]*>?' // Find end of element. If not found, match all input. 669 . ')'; 670 } 671 672 if ( empty( $shortcode_regex ) ) { 673 $regex = '/(' . $html_regex . ')/'; 674 } else { 675 $regex = '/(' . $html_regex . '|' . $shortcode_regex . ')/'; 676 } 677 678 return $regex; 679 } 680 681 /** 682 * Retrieve the regular expression for shortcodes. 683 * 684 * @access private 685 * @ignore 686 * @internal This function will be removed in 4.5.0 per Shortcode API Roadmap. 687 * @since 4.4.0 688 * 689 * @param array $tagnames List of shortcodes to find. 690 * @return string The regular expression 691 */ 692 function _get_wptexturize_shortcode_regex( $tagnames ) { 693 $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) ); 694 $tagregexp = "(?:$tagregexp)(?=[\\s\\]\\/])"; // Excerpt of get_shortcode_regex(). 695 $regex = 696 '\[' // Find start of shortcode. 697 . '[\/\[]?' // Shortcodes may begin with [/ or [[ 698 . $tagregexp // Only match registered shortcodes, because performance. 699 . '(?:' 700 . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. 701 . '|' 702 . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. 703 . ')*+' // Possessive critical. 704 . '\]' // Find end of shortcode. 705 . '\]?'; // Shortcodes may end with ]] 706 707 return $regex; 654 708 } 655 709 … … 769 823 . '(?:' . $spaces . ')*+' // optional trailing whitespace 770 824 . '<\\/p>' // closing paragraph 771 . '/ s';825 . '/'; 772 826 773 827 return preg_replace( $pattern, '$1', $pee ); -
trunk/src/wp-includes/shortcodes.php
r34747 r34761 169 169 170 170 if ( shortcode_exists( $tag ) ) { 171 preg_match_all( '/' . get_shortcode_regex() . '/ s', $content, $matches, PREG_SET_ORDER );171 preg_match_all( '/' . get_shortcode_regex() . '/', $content, $matches, PREG_SET_ORDER ); 172 172 if ( empty( $matches ) ) 173 173 return false; … … 220 220 221 221 $pattern = get_shortcode_regex( $tagnames ); 222 $content = preg_replace_callback( "/$pattern/ s", 'do_shortcode_tag', $content );222 $content = preg_replace_callback( "/$pattern/", 'do_shortcode_tag', $content ); 223 223 224 224 // Always restore square braces so we don't break things like <!--[if IE ]> … … 379 379 // Some plugins are doing things like [name] <[email]>. 380 380 if ( 1 === preg_match( '%^<\s*\[\[?[^\[\]]+\]%', $element ) ) { 381 $element = preg_replace_callback( "/$pattern/ s", 'do_shortcode_tag', $element );381 $element = preg_replace_callback( "/$pattern/", 'do_shortcode_tag', $element ); 382 382 } 383 383 … … 408 408 // was written by an administrator, so we should avoid changing the output 409 409 // and we do not need to run KSES here. 410 $attr = preg_replace_callback( "/$pattern/ s", 'do_shortcode_tag', $attr );410 $attr = preg_replace_callback( "/$pattern/", 'do_shortcode_tag', $attr ); 411 411 } else { 412 412 // $attr like 'name = "[shortcode]"' or "name = '[shortcode]'" 413 413 // We do not know if $content was unfiltered. Assume KSES ran before shortcodes. 414 414 $count = 0; 415 $new_attr = preg_replace_callback( "/$pattern/ s", 'do_shortcode_tag', $attr, -1, $count );415 $new_attr = preg_replace_callback( "/$pattern/", 'do_shortcode_tag', $attr, -1, $count ); 416 416 if ( $count > 0 ) { 417 417 // Sanitize the shortcode output using KSES. … … 573 573 574 574 $pattern = get_shortcode_regex( $tagnames ); 575 $content = preg_replace_callback( "/$pattern/ s", 'strip_shortcode_tag', $content );575 $content = preg_replace_callback( "/$pattern/", 'strip_shortcode_tag', $content ); 576 576 577 577 // Always restore square braces so we don't break things like <!--[if IE ]> -
trunk/tests/phpunit/includes/utils.php
r34655 r34761 391 391 } 392 392 } 393 394 /** 395 * Determine approximate backtrack count when running PCRE. 396 * 397 * @return int The backtrack count. 398 */ 399 function benchmark_pcre_backtracking( $pattern, $subject, $strategy ) { 400 $saved_config = ini_get( 'pcre.backtrack_limit' ); 401 402 // Attempt to prevent PHP crashes. Adjust these lower when needed. 403 if ( version_compare( phpversion(), '5.4.8', '>' ) ) { 404 $limit = 1000000; 405 } else { 406 $limit = 20000; // 20,000 is a reasonable upper limit, but see also https://core.trac.wordpress.org/ticket/29557#comment:10 407 } 408 409 // Start with small numbers, so if a crash is encountered at higher numbers we can still debug the problem. 410 for( $i = 4; $i <= $limit; $i *= 2 ) { 411 412 ini_set( 'pcre.backtrack_limit', $i ); 413 414 switch( $strategy ) { 415 case 'split': 416 preg_split( $pattern, $subject ); 417 break; 418 case 'match': 419 preg_match( $pattern, $subject ); 420 break; 421 case 'match_all': 422 preg_match_all( $pattern, $subject ); 423 break; 424 } 425 426 ini_set( 'pcre.backtrack_limit', $saved_config ); 427 428 switch( preg_last_error() ) { 429 case PREG_NO_ERROR: 430 return $i; 431 case PREG_BACKTRACK_LIMIT_ERROR: 432 continue; 433 case PREG_RECURSION_LIMIT_ERROR: 434 trigger_error('PCRE recursion limit encountered before backtrack limit.'); 435 break; 436 case PREG_BAD_UTF8_ERROR: 437 trigger_error('UTF-8 error during PCRE benchmark.'); 438 break; 439 case PREG_INTERNAL_ERROR: 440 trigger_error('Internal error during PCRE benchmark.'); 441 break; 442 default: 443 trigger_error('Unexpected error during PCRE benchmark.'); 444 } 445 } 446 447 return $i; 448 } -
trunk/tests/phpunit/tests/formatting/WPTexturize.php
r34747 r34761 2049 2049 ); 2050 2050 } 2051 2052 /** 2053 * Automated performance testing of the main regex. 2054 * 2055 * @dataProvider data_whole_posts 2056 */ 2057 function test_pcre_performance( $input ) { 2058 global $shortcode_tags; 2059 2060 // With Shortcodes Disabled 2061 $regex = _get_wptexturize_split_regex( ); 2062 $result = benchmark_pcre_backtracking( $regex, $input, 'split' ); 2063 $this->assertLessThan( 200, $result ); 2064 2065 // With Shortcodes Enabled 2066 $shortcode_regex = _get_wptexturize_shortcode_regex( array_keys( $shortcode_tags ) ); 2067 $regex = _get_wptexturize_split_regex( $shortcode_regex ); 2068 $result = benchmark_pcre_backtracking( $regex, $input, 'split' ); 2069 return $this->assertLessThan( 200, $result ); 2070 } 2071 2072 function data_whole_posts() { 2073 require_once( DIR_TESTDATA . '/formatting/whole-posts.php' ); 2074 return data_whole_posts(); 2075 } 2051 2076 } -
trunk/tests/phpunit/tests/shortcode.php
r34745 r34761 617 617 ); 618 618 } 619 620 /** 621 * Automated performance testing of the main regex. 622 * 623 * @dataProvider data_whole_posts 624 */ 625 function test_pcre_performance( $input ) { 626 $regex = '/' . get_shortcode_regex() . '/'; 627 $result = benchmark_pcre_backtracking( $regex, $input, 'match_all' ); 628 return $this->assertLessThan( 200, $result ); 629 } 630 631 function data_whole_posts() { 632 require_once( DIR_TESTDATA . '/formatting/whole-posts.php' ); 633 return data_whole_posts(); 634 } 619 635 }
Note: See TracChangeset
for help on using the changeset viewer.