Ticket #33517: 33517.3.patch
| File 33517.3.patch, 9.5 KB (added by , 10 years ago) |
|---|
-
src/wp-includes/formatting.php
216 216 217 217 // Look for shortcodes and HTML elements. 218 218 219 $tagnames = array_keys( $shortcode_tags ); 220 $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) ); 221 $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex(). 219 preg_match_all( '@\[/?([^<>&/\[\]\x00-\x20]++)@', $text, $matches ); 220 $tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] ); 221 $found_shortcodes = ! empty( $tagnames ); 222 if ( $found_shortcodes ) { 223 $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) ); 224 $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex(). 225 $shortcode_regex = 226 '\[' // Find start of shortcode. 227 . '[\/\[]?' // Shortcodes may begin with [/ or [[ 228 . $tagregexp // Only match registered shortcodes, because performance. 229 . '(?:' 230 . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. 231 . '|' 232 . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. 233 . ')*+' // Possessive critical. 234 . '\]' // Find end of shortcode. 235 . '\]?'; // Shortcodes may end with ]] 236 } 222 237 223 238 $comment_regex = 224 239 '!' // Start of comment, after the <. … … 228 243 . ')*+' // Loop possessively. 229 244 . '(?:-->)?'; // End of comment. If not found, match all input. 230 245 231 $shortcode_regex = 232 '\[' // Find start of shortcode. 233 . '[\/\[]?' // Shortcodes may begin with [/ or [[ 234 . $tagregexp // Only match registered shortcodes, because performance. 235 . '(?:' 236 . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. 246 $html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap. 247 '<' // Find start of element. 248 . '(?(?=!--)' // Is this a comment? 249 . $comment_regex // Find end of comment. 237 250 . '|' 238 . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. 239 . ')*+' // Possessive critical. 240 . '\]' // Find end of shortcode. 241 . '\]?'; // Shortcodes may end with ]] 251 . '[^>]*>?' // Find end of element. If not found, match all input. 252 . ')'; 242 253 243 $regex = 244 '/(' // Capture the entire match. 245 . '<' // Find start of element. 246 . '(?(?=!--)' // Is this a comment? 247 . $comment_regex // Find end of comment. 248 . '|' 249 . '[^>]*>' // Find end of element. 250 . ')' 251 . '|' 252 . $shortcode_regex // Find shortcodes. 253 . ')/s'; 254 if ( $found_shortcodes ) { 255 $regex = '/(' . $html_regex . '|' . $shortcode_regex . ')/s'; 256 } else { 257 $regex = '/(' . $html_regex . ')/s'; 258 } 254 259 255 260 $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); 256 261 … … 257 262 foreach ( $textarr as &$curl ) { 258 263 // Only call _wptexturize_pushpop_element if $curl is a delimiter. 259 264 $first = $curl[0]; 260 if ( '<' === $first && '<!--' === substr( $curl, 0, 4 ) ) { 261 // This is an HTML comment delimiter. 265 if ( '<' === $first ) { 266 if ( '<!--' === substr( $curl, 0, 4 ) ) { 267 // This is an HTML comment delimeter. 268 continue; 269 } else { 270 // This is an HTML element delimiter. 271 _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags ); 272 } 262 273 263 continue;264 265 } elseif ( '<' === $first && '>' === substr( $curl, -1 ) ) {266 // This is an HTML element delimiter.267 268 _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );269 270 274 } elseif ( '' === trim( $curl ) ) { 271 275 // This is a newline between delimiters. Performance improves when we check this. 272 273 276 continue; 274 277 275 } elseif ( '[' === $first && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) {278 } elseif ( '[' === $first && $found_shortcodes && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) { 276 279 // This is a shortcode delimiter. 277 280 278 281 if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) { -
src/wp-includes/shortcodes.php
195 195 if (empty($shortcode_tags) || !is_array($shortcode_tags)) 196 196 return $content; 197 197 198 $tagnames = array_keys($shortcode_tags);199 $tagregexp = join( '|', array_map('preg_quote', $tagnames));200 $ pattern = "/\\[($tagregexp)/s";198 // Find all registered tag names in $content. 199 preg_match_all( '@\[([^<>&/\[\]\x00-\x20]++)@', $content, $matches ); 200 $tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] ); 201 201 202 if ( 1 !== preg_match( $pattern, $content ) ) { 203 // Avoids parsing HTML when there are no shortcodes or embeds anyway. 202 if ( empty( $tagnames ) ) { 204 203 return $content; 205 204 } 206 205 207 $content = do_shortcodes_in_html_tags( $content, $ignore_html );206 $content = do_shortcodes_in_html_tags( $content, $ignore_html, $tagnames ); 208 207 209 $pattern = get_shortcode_regex( );208 $pattern = get_shortcode_regex( $tagnames ); 210 209 $content = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content ); 211 210 212 211 // Always restore square braces so we don't break things like <!--[if IE ]> … … 234 233 * 235 234 * @global array $shortcode_tags 236 235 * 236 * @param array $tagnames List of shortcodes to find. Optional. Defaults to all registered shortcodes. 237 237 * @return string The shortcode search regular expression 238 238 */ 239 function get_shortcode_regex( ) {239 function get_shortcode_regex( $tagnames = null ) { 240 240 global $shortcode_tags; 241 $tagnames = array_keys($shortcode_tags); 241 242 if ( empty( $tagnames ) ) { 243 $tagnames = array_keys( $shortcode_tags ); 244 } 242 245 $tagregexp = join( '|', array_map('preg_quote', $tagnames) ); 243 246 244 247 // WARNING! Do not change this regex without changing do_shortcode_tag() and strip_shortcode_tag() … … 324 327 * 325 328 * @param string $content Content to search for shortcodes 326 329 * @param bool $ignore_html When true, all square braces inside elements will be encoded. 330 * @param array $tagnames List of shortcodes to find. 327 331 * @return string Content with shortcodes filtered out. 328 332 */ 329 function do_shortcodes_in_html_tags( $content, $ignore_html ) {333 function do_shortcodes_in_html_tags( $content, $ignore_html, $tagnames ) { 330 334 // Normalize entities in unfiltered HTML before adding placeholders. 331 335 $trans = array( '[' => '[', ']' => ']' ); 332 336 $content = strtr( $content, $trans ); 333 337 $trans = array( '[' => '[', ']' => ']' ); 334 338 335 $pattern = get_shortcode_regex( );339 $pattern = get_shortcode_regex( $tagnames ); 336 340 $textarr = wp_html_split( $content ); 337 341 338 342 foreach ( $textarr as &$element ) { … … 541 545 if (empty($shortcode_tags) || !is_array($shortcode_tags)) 542 546 return $content; 543 547 544 $content = do_shortcodes_in_html_tags( $content, true ); 548 // Find all registered tag names in $content. 549 preg_match_all( '@\[([^<>&/\[\]\x00-\x20]++)@', $content, $matches ); 550 $tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] ); 545 551 546 $pattern = get_shortcode_regex(); 552 if ( empty( $tagnames ) ) { 553 return $content; 554 } 555 556 $content = do_shortcodes_in_html_tags( $content, true, $tagnames ); 557 558 $pattern = get_shortcode_regex( $tagnames ); 547 559 $content = preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content ); 548 560 549 561 // Always restore square braces so we don't break things like <!--[if IE ]> -
tests/phpunit/tests/formatting/WPTexturize.php
374 374 "word [‘word word", 375 375 ), 376 376 array( 377 "word <'word word", // Invalid HTML input triggers the apos in a word pattern.378 "word < ’word word",377 "word <'word word", // Invalid HTML 378 "word <'word word", 379 379 ), 380 380 array( 381 381 "word <'word word", // Valid HTML input makes curly quotes. … … 403 403 ), 404 404 array( 405 405 "word<'word word", 406 "word< ’word word",406 "word<'word word", 407 407 ), 408 408 array( 409 409 "word<'word word", … … 431 431 ), 432 432 array( 433 433 "word <' word word", 434 "word < ’word word",434 "word <' word word", 435 435 ), 436 436 array( 437 437 "word <' word word", … … 459 459 ), 460 460 array( 461 461 "word<' word word", 462 "word< ’word word",462 "word<' word word", 463 463 ), 464 464 array( 465 465 "word<' word word", … … 610 610 'word [“word word', 611 611 ), 612 612 array( 613 'word <"word word', // Invalid HTML input triggers the closing quote pattern.614 'word < ”word word',613 'word <"word word', // Invalid HTML 614 'word <"word word', 615 615 ), 616 616 array( 617 617 'word <"word word', … … 643 643 ), 644 644 array( 645 645 'word<"word word', 646 'word< ”word word',646 'word<"word word', 647 647 ), 648 648 array( 649 649 'word<"word word', … … 1312 1312 ), 1313 1313 array( 1314 1314 '<br [gallery ...] ... /', 1315 '<br [gallery ...] …/',1315 '<br [gallery ...] ... /', 1316 1316 ), 1317 1317 array( 1318 1318 '<br ... />', … … 1352 1352 ), 1353 1353 array( 1354 1354 '<br [[gallery ...]] ... /', 1355 '<br [[gallery ...]] …/',1355 '<br [[gallery ...]] ... /', 1356 1356 ), 1357 1357 array( 1358 1358 '[[gallery ...]]...[[gallery ...]]',