Ticket #18549: 18549_wptexturize.4.diff
File 18549_wptexturize.4.diff, 34.1 KB (added by , 8 years ago) |
---|
-
src/wp-includes/formatting.php
54 54 $closing_quote = null, 55 55 $opening_single_quote = null, 56 56 $closing_single_quote = null, 57 $ open_q_flag = '<!--oq-->',58 $ open_sq_flag = '<!--osq-->',59 $ apos_flag = '<!--apos-->';57 $apos_flag, $open_sq_flag, $open_q_flag, $close_sq_flag, $close_q_flag, $prime_sq_flag, $prime_q_flag, $sq_flag, $q_flag, $primes_flag, 58 $flags_sq, $flags_q, $reals_sq, $reals_q, 59 $spaces; 60 60 61 61 // If there's nothing to do, just stop. 62 62 if ( empty( $text ) || false === $run_texturize ) { … … 107 107 /* translators: em dash */ 108 108 $em_dash = _x( '—', 'em dash' ); 109 109 110 // Standardize size of flags to max of primes/quotes manipulated by wptexturize_primes(). 111 // This will allow wptexturize_primes() to do its replacements without worrying about offsets changing. 112 $flag_len = max( 5, strlen( $closing_quote ), strlen( $prime ), strlen( $double_prime ), strlen( $closing_single_quote ) ); 113 114 $apos_flag = str_pad( '<i a>', $flag_len, '>' ); 115 $open_sq_flag = str_pad( '<i o>', $flag_len, '>' ); 116 $close_sq_flag = str_pad( '<i c>', $flag_len, '>' ); 117 $prime_sq_flag = str_pad( '<i p>', $flag_len, '>' ); 118 $prime_q_flag = str_pad( '<i P>', $flag_len, '>' ); 119 $open_q_flag = str_pad( '<i O>', $flag_len, '>' ); 120 $close_q_flag = str_pad( '<i C>', $flag_len, '>' ); 121 $sq_flag = str_repeat( "'", $flag_len ); 122 $q_flag = str_repeat( '"', $flag_len ); 123 $primes_flag = str_pad( '<i f>', $flag_len, '>' ); 124 125 // Flags & reals arrays - used to reinstate the real values. 126 $flags_sq = array( $sq_flag, $prime_sq_flag, $open_sq_flag, $close_sq_flag, $apos_flag ); 127 $reals_sq = array( "'", $prime, $opening_single_quote, $closing_single_quote, $apos ); 128 $flags_q = array( $q_flag, $prime_q_flag, $open_q_flag, $close_q_flag ); 129 $reals_q = array( '"', $double_prime, $opening_quote, $closing_quote ); 130 110 131 $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt'); 111 132 $default_no_texturize_shortcodes = array('code'); 112 133 … … 139 160 140 161 // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation. 141 162 if ( "'" !== $apos || "'" !== $closing_single_quote ) { 142 $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $clos ing_single_quote;163 $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $close_sq_flag; 143 164 } 144 165 if ( "'" !== $apos || '"' !== $closing_quote ) { 145 $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $clos ing_quote;166 $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $close_q_flag; 146 167 } 147 168 148 169 // '99 '99s '99's (apostrophe) But never '9 or '99% or '999 or '99.0. … … 151 172 } 152 173 153 174 // Quoted Numbers like '0.42' 154 if ( "'" !== $opening_single_quote &&"'" !== $closing_single_quote ) {155 $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $clos ing_single_quote;175 if ( "'" !== $opening_single_quote || "'" !== $closing_single_quote ) { 176 $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $close_sq_flag; 156 177 } 157 178 158 179 // Single quote at start, or preceded by (, {, <, [, ", -, or spaces. … … 170 191 $dynamic = array(); 171 192 172 193 // Quoted Numbers like "42" 173 if ( '"' !== $opening_quote &&'"' !== $closing_quote ) {174 $dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $open_q_flag . '$1' . $clos ing_quote;194 if ( '"' !== $opening_quote || '"' !== $closing_quote ) { 195 $dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $open_q_flag . '$1' . $close_q_flag; 175 196 } 176 197 177 198 // Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces. … … 214 235 $no_texturize_tags_stack = array(); 215 236 $no_texturize_shortcodes_stack = array(); 216 237 217 // Look for shortcodes and HTML elements.218 219 238 preg_match_all( '@\[/?([^<>&/\[\]\x00-\x20=]++)@', $text, $matches ); 220 239 $tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] ); 221 $found_shortcodes = ! empty( $tagnames );222 $shortcode_regex = $found_shortcodes ? _get_wptexturize_shortcode_regex( $tagnames ) : '';223 $regex = _get_wptexturize_split_regex( $shortcode_regex );224 240 225 $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); 241 if ( $tagnames ) { 242 // Set up shortcodes regular expression (used to strip within each split text part). 243 $shortcode_regex = '|' . _get_wptexturize_shortcode_regex( $tagnames ); 226 244 227 foreach ( $textarr as &$curl ) { 228 // Only call _wptexturize_pushpop_element if $curl is a delimiter. 229 $first = $curl[0]; 230 if ( '<' === $first ) { 231 if ( '<!--' === substr( $curl, 0, 4 ) ) { 232 // This is an HTML comment delimiter. 233 continue; 234 } else { 235 // This is an HTML element delimiter. 245 // Set up no texturize shortcodes regular expression (used to split text input). 246 // No texturize shortcodes must also be registered to be ignored, so intersect with tagnames array. 247 $no_texturize_shortcodes = array_intersect( $no_texturize_shortcodes, $tagnames ); 248 $no_texturize_shortcode_regex = $no_texturize_shortcodes ? _get_wptexturize_shortcode_regex( $no_texturize_shortcodes ) : ''; 249 } else { 250 $shortcode_regex = $no_texturize_shortcode_regex = ''; 251 } 236 252 237 // Replace each & with & unless it already looks like an entity. 238 $curl = preg_replace( '/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&', $curl ); 253 // Look for comments, non-inline (non-split) HTML elements and no texturize shortcodes. 239 254 240 _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags ); 255 $regex = _get_wptexturize_split_regex( $no_texturize_shortcode_regex ); 256 257 $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); 258 259 foreach ( $textarr as $curl_idx => &$curl ) { 260 if ( 1 === $curl_idx % 2 ) { 261 // Delimiter. 262 $first = $curl[0]; 263 if ( '<' === $first ) { 264 // If not a comment. 265 if ( '<!--' !== substr( $curl, 0, 4 ) ) { 266 // This is an HTML element delimiter. 267 268 // Replace each & with & unless it already looks like an entity. 269 $curl = preg_replace( '/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&', $curl ); 270 271 _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags ); 272 } 273 } elseif ( '[' === $first ) { 274 // This is a shortcode delimiter. 275 276 if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) { 277 // Looks like a normal shortcode. 278 _wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes ); 279 } else { 280 // Looks like an escaped shortcode. 281 } 241 282 } 283 } elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) && '' !== trim( $curl ) ) { 284 // This is neither a delimiter, nor is this content inside of no_texturize pairs. Do texturize. 242 285 243 } elseif ( '' === trim( $curl ) ) { 244 // This is a newline between delimiters. Performance improves when we check this. 245 continue; 286 // Add a space to any <br>s so that when stripped will be recognized as whitespace. 287 if ( $have_br = ( false !== stripos( $curl, '<br' ) ) ) { 288 $curl = preg_replace( '/<br[^>]*>/i', '$0 ', $curl ); 289 } 246 290 247 } elseif ( '[' === $first && $found_shortcodes && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) { 248 // This is a shortcode delimiter. 291 if ( wptexturize_replace_init( $curl, '/<[^>]*>' . $shortcode_regex . '/S' ) ) { // The study option here makes a big difference. 249 292 250 if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) { 251 // Looks like a normal shortcode. 252 _wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes ); 293 wptexturize_replace_str( $curl, $static_characters, $static_replacements ); 294 295 if ( false !== strpos( $curl, "'" ) ) { 296 wptexturize_replace_regex( $curl, $dynamic_characters['apos'], $dynamic_replacements['apos'] ); 297 wptexturize_replace_str( $curl, "'", $sq_flag ); // Substitute single quotes with same-sized dummy so that wptexturize_primes() doesn't alter size of string. 298 $curl = wptexturize_primes( $curl, $sq_flag, $prime_sq_flag, $open_sq_flag, $close_sq_flag, $primes_flag, $spaces ); 299 wptexturize_replace_str( $curl, $flags_sq, $reals_sq ); // Reinstate real values. 300 } 301 if ( false !== strpos( $curl, '"' ) ) { 302 wptexturize_replace_regex( $curl, $dynamic_characters['quote'], $dynamic_replacements['quote'] ); 303 wptexturize_replace_str( $curl, '"', $q_flag ); // Substitute double quotes with same-sized dummy so that wptexturize_primes() doesn't alter size of string. 304 $curl = wptexturize_primes( $curl, $q_flag, $prime_q_flag, $open_q_flag, $close_q_flag, $primes_flag, $spaces ); 305 wptexturize_replace_str( $curl, $flags_q, $reals_q ); // Reinstate real values. 306 } 307 if ( false !== strpos( $curl, '-' ) ) { 308 wptexturize_replace_regex( $curl, $dynamic_characters['dash'], $dynamic_replacements['dash'] ); 309 } 310 311 // 9x9 (times), but never 0x9999 312 if ( 1 === preg_match( '/(?<=\d)x\d/', $curl ) ) { 313 // Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one! 314 wptexturize_replace_regex( $curl, '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(?=\d[\d\.,]*\b)/', '$1×' ); // Changed to use look ahead as can only deal with a single sub-replacement. 315 } 316 317 wptexturize_replace_final( $curl ); 318 253 319 } else { 254 // Looks like an escaped shortcode.255 continue;256 }257 320 258 } elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) ) { 259 // This is neither a delimiter, nor is this content inside of no_texturize pairs. Do texturize. 321 $curl = str_replace( $static_characters, $static_replacements, $curl ); 260 322 261 $curl = str_replace( $static_characters, $static_replacements, $curl ); 323 if ( false !== strpos( $curl, "'" ) ) { 324 $curl = preg_replace( $dynamic_characters['apos'], $dynamic_replacements['apos'], $curl ); 325 $curl = wptexturize_primes( $curl, "'", $prime, $open_sq_flag, $close_sq_flag, $primes_flag, $spaces ); 326 $curl = str_replace( array( $apos_flag, $open_sq_flag, $close_sq_flag ), array( $apos, $opening_single_quote, $closing_single_quote ), $curl ); 327 } 328 if ( false !== strpos( $curl, '"' ) ) { 329 $curl = preg_replace( $dynamic_characters['quote'], $dynamic_replacements['quote'], $curl ); 330 $curl = wptexturize_primes( $curl, '"', $double_prime, $open_q_flag, $close_q_flag, $primes_flag, $spaces ); 331 $curl = str_replace( array( $open_q_flag, $close_q_flag ), array( $opening_quote, $closing_quote ), $curl ); 332 } 333 if ( false !== strpos( $curl, '-' ) ) { 334 $curl = preg_replace( $dynamic_characters['dash'], $dynamic_replacements['dash'], $curl ); 335 } 262 336 263 if ( false !== strpos( $curl, "'" ) ) {264 $curl = preg_replace( $dynamic_characters['apos'], $dynamic_replacements['apos'], $curl );265 $curl = wptexturize_primes( $curl, "'", $prime, $open_sq_flag, $closing_single_quote );266 $curl = str_replace( $apos_flag, $apos, $curl );267 $curl = str_replace( $open_sq_flag, $opening_single_quote, $curl );337 // 9x9 (times), but never 0x9999 338 if ( 1 === preg_match( '/(?<=\d)x\d/', $curl ) ) { 339 // Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one! 340 $curl = preg_replace( '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(\d[\d\.,]*)\b/', '$1×$2', $curl ); 341 } 268 342 } 269 if ( false !== strpos( $curl, '"' ) ) {270 $curl = preg_replace( $dynamic_characters['quote'], $dynamic_replacements['quote'], $curl );271 $curl = wptexturize_primes( $curl, '"', $double_prime, $open_q_flag, $closing_quote );272 $curl = str_replace( $open_q_flag, $opening_quote, $curl );273 }274 if ( false !== strpos( $curl, '-' ) ) {275 $curl = preg_replace( $dynamic_characters['dash'], $dynamic_replacements['dash'], $curl );276 }277 343 278 // 9x9 (times), but never 0x9999 279 if ( 1 === preg_match( '/(?<=\d)x\d/', $curl ) ) { 280 // Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one! 281 $curl = preg_replace( '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(\d[\d\.,]*)\b/', '$1×$2', $curl ); 344 // Remove any spaces added to <br>s at the start. 345 if ( $have_br ) { 346 $curl = preg_replace( '/(<br[^>]*>) /i', '$1', $curl ); 282 347 } 283 348 284 349 // Replace each & with & unless it already looks like an entity. … … 285 350 $curl = preg_replace( '/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&', $curl ); 286 351 } 287 352 } 288 289 353 return implode( '', $textarr ); 290 354 } 291 355 … … 303 367 * @param string $close_quote The closing quote char to use for replacement. 304 368 * @return string The $haystack value after primes and quotes replacements. 305 369 */ 306 function wptexturize_primes( $haystack, $needle, $prime, $open_quote, $close_quote ) { 307 $spaces = wp_spaces_regexp(); 308 $flag = '<!--wp-prime-or-quote-->'; 370 function wptexturize_primes( $haystack, $needle, $prime, $open_quote, $close_quote, $flag, $spaces ) { 371 $flag_len = strlen( $flag ); 309 372 $quote_pattern = "/$needle(?=\\Z|[.,:;!?)}\\-\\]]|>|" . $spaces . ")/"; 310 373 $prime_pattern = "/(?<=\\d)$needle/"; 311 374 $flag_after_digit = "/(?<=\\d)$flag/"; … … 332 395 // This is most likely to be problematic in the context of bug #18549. 333 396 $pos = strrpos( $sentence, $flag ); 334 397 } 335 $sentence = substr_replace( $sentence, $close_quote, $pos, strlen( $flag ));398 $sentence = substr_replace( $sentence, $close_quote, $pos, $flag_len ); 336 399 } 337 400 // Use conventional replacement on any remaining primes and quotes. 338 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); 339 $sentence = preg_replace( $flag_after_digit, $prime, $sentence ); 401 $sentence = preg_replace( array( $prime_pattern, $flag_after_digit ), $prime, $sentence ); 340 402 $sentence = str_replace( $flag, $close_quote, $sentence ); 341 } elseif ( 1 == $count ) {403 } elseif ( 1 === $count ) { 342 404 // Found only one closing quote candidate, so give it priority over primes. 343 405 $sentence = str_replace( $flag, $close_quote, $sentence ); 344 406 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); … … 347 409 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); 348 410 } 349 411 } else { 350 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); 351 $sentence = preg_replace( $quote_pattern, $close_quote, $sentence ); 412 $sentence = preg_replace( array( $prime_pattern, $quote_pattern ), array( $prime, $close_quote ), $sentence ); 352 413 } 353 if ( '"' == $needle && false !== strpos( $sentence, '"') ) {354 $sentence = str_replace( '"', $close_quote, $sentence );414 if ( '"' === $needle[0] && false !== strpos( $sentence, $needle ) ) { 415 $sentence = str_replace( $needle, $close_quote, $sentence ); 355 416 } 356 417 } 357 418 … … 375 436 function _wptexturize_pushpop_element( $text, &$stack, $disabled_elements ) { 376 437 // Is it an opening tag or closing tag? 377 438 if ( isset( $text[1] ) && '/' !== $text[1] ) { 378 $opening_tag = true; 379 $name_offset = 1; 380 } elseif ( 0 == count( $stack ) ) { 381 // Stack is empty. Just stop. 382 return; 383 } else { 384 $opening_tag = false; 385 $name_offset = 2; 386 } 387 388 // Parse out the tag name. 389 $space = strpos( $text, ' ' ); 390 if ( false === $space ) { 391 $space = -1; 392 } else { 393 $space -= $name_offset; 394 } 395 $tag = substr( $text, $name_offset, $space ); 396 397 // Handle disabled tags. 398 if ( in_array( $tag, $disabled_elements ) ) { 399 if ( $opening_tag ) { 439 $space = strpos( $text, ' ' ); 440 if ( false === $space ) { 441 $tag = substr( $text, 1, -1 ); 442 } else { 443 $tag = substr( $text, 1, $space - 1 ); 444 } 445 if ( in_array( $tag, $disabled_elements ) ) { // If $disabled_elements was array_flipped then could use hash lookup isset( $disabled_elements[ $tag ] ) here instead of linear lookup. 400 446 /* 401 447 * This disables texturize until we find a closing tag of our type 402 448 * (e.g. <pre>) even if there was invalid nesting before that … … 404 450 * Example: in the case <pre>sadsadasd</code>"baba"</pre> 405 451 * "baba" won't be texturize 406 452 */ 407 408 array_push( $stack, $tag ); 409 } elseif ( end( $stack ) == $tag ) { 453 $stack[] = $tag; 454 } 455 } elseif ( $stack ) { 456 $space = strpos( $text, ' ' ); 457 if ( false === $space ) { 458 $tag = substr( $text, 2, -1 ); 459 } else { 460 $tag = substr( $text, 2, $space - 2 ); 461 } 462 if ( in_array( $tag, $disabled_elements ) && end( $stack ) === $tag ) { // Sim. could use isset( $disabled_elements[ $tag ] ) if above. 410 463 array_pop( $stack ); 411 464 } 412 465 } … … 413 466 } 414 467 415 468 /** 469 * Initialize the stripped string routines wptexturize_replace_XXX, setting the globals used. 470 * $str will be stripped of any strings that match the regular expression $search. 471 */ 472 function wptexturize_replace_init( &$str, $search ) { 473 global $wptexturize_strip_cnt, $wptexturize_strips, $wptexturize_adjusts; 474 475 $wptexturize_strip_cnt = 0; 476 477 if ( preg_match_all( $search, $str, $matches, PREG_OFFSET_CAPTURE ) ) { 478 $wptexturize_strips = $wptexturize_adjusts = $strs = array(); 479 $diff = 0; 480 foreach ( $matches[0] as $entry ) { 481 list( $match, $offset ) = $entry; 482 $len = strlen( $match ); 483 // Save details of stripped string. 484 $wptexturize_strips[] = array( $match, $offset - $diff /*, $len /* Store len if not using byte array in wptexturize_replace_final(). */ ); 485 $diff += $len; 486 $strs[] = $match; // If using str_replace rather than (safer) preg_replace. 487 } 488 if ( $wptexturize_strip_cnt = count( $wptexturize_strips ) ) { 489 $str = str_replace( $strs, '', $str ); // Assuming simple matches replaceable in whole string (otherwise need to do preg_replace( $search, '', $str )). 490 } 491 } 492 return $wptexturize_strip_cnt; 493 } 494 495 /** 496 * Do a straight (non-regexp) string substitution, keeping tabs on the offset adjustments if have a stripped string. 497 */ 498 function wptexturize_replace_str( &$str, $search, $repl ) { 499 global $wptexturize_strip_cnt, $wptexturize_adjusts; 500 501 if ( $wptexturize_strip_cnt ) { 502 // Process simple string search, given replacement string $repl. 503 $searches = is_array( $search ) ? $search : array( $search ); 504 $repls = is_array( $repl ) ? $repl : array( $repl ); 505 506 // As replacements could interfere with later ones, treat each separately. 507 foreach ( $searches as $idx => $search_str ) { 508 if ( false !== ( $offset = strpos( $str, $search_str ) ) ) { 509 $repl_str = $repls[$idx]; 510 $repl_len = strlen( $repl_str ); 511 $len = strlen( $search_str ); 512 $diff_len = $repl_len - $len; 513 if ( $diff_len ) { 514 $diff = 0; 515 do { 516 // Store adjustment details. 517 $wptexturize_adjusts[] = array( $offset + $diff, $repl_len, $len ); 518 $diff += $diff_len; 519 } while ( false !== ( $offset = strpos( $str, $search_str, $offset + $len ) ) ); 520 } 521 $str = str_replace( $search_str, $repl_str, $str ); 522 } 523 } 524 } else { 525 $str = str_replace( $search, $repl, $str ); 526 } 527 } 528 529 /** 530 * Do a regexp string substitution, keeping tabs on the offset adjustments if have a stripped string. 531 */ 532 function wptexturize_replace_regex( &$str, $search, $repl ) { 533 global $wptexturize_strip_cnt, $wptexturize_adjusts; 534 535 if ( $wptexturize_strip_cnt ) { 536 // Process regex, given replacement string $repl. 537 $searches = is_array( $search ) ? $search : array( $search ); 538 $repls = is_array( $repl ) ? $repl : array( $repl ); 539 540 // As replacements could interfere with later ones, treat each separately. 541 foreach ( $searches as $idx => $re ) { 542 if ( preg_match_all( $re, $str, $matches, PREG_OFFSET_CAPTURE ) ) { 543 $repl_str = $repls[$idx]; 544 $repl_len = strlen( $repl_str ); 545 $diff = 0; 546 // Allow for a single captured replacement. 547 if ( false !== ( $pos1 = strpos( $repl_str, '$1' ) ) ) { 548 foreach ( $matches[0] as $i => $entry ) { 549 list( $match, $offset ) = $entry; 550 // For a 'pre$1post' replacement, need to track pre-submatch replace and then post-submatch replace. 551 $pre_repl_len = $pos1; 552 $pre_len = $matches[1][$i][1] - $offset; // Submatch offset less full match offset. 553 if ( $pre_repl_len !== $pre_len ) { 554 // Store adjustment details. 555 $wptexturize_adjusts[] = array( $offset + $diff, $pre_repl_len, $pre_len ); 556 $diff += $pre_repl_len - $pre_len; 557 } 558 $len1 = strlen( $matches[1][$i][0] ); // Length of submatch string. 559 $post_repl_len = $repl_len - ( $pre_repl_len + 2 ); 560 $post_len = strlen( $match ) - ( $pre_len + $len1 ); 561 if ( $post_repl_len !== $post_len ) { 562 // Store adjustment details. 563 $offset += $pre_len + $len1; // Jump over substituted pre-string & submatch. 564 $wptexturize_adjusts[] = array( $offset + $diff, $post_repl_len, $post_len ); 565 $diff += $post_repl_len - $post_len; 566 } 567 } 568 } else { 569 foreach ( $matches[0] as $entry ) { 570 list( $match, $offset ) = $entry; 571 $len = strlen( $match ); 572 if ( $repl_len !== $len ) { 573 // Store adjustment details. 574 $wptexturize_adjusts[] = array( $offset + $diff, $repl_len, $len ); 575 $diff += $repl_len - $len; 576 } 577 } 578 } 579 $str = preg_replace( $re, $repl_str, $str ); 580 } 581 } 582 } else { 583 $str = preg_replace( $search, $repl, $str ); 584 } 585 } 586 587 /** 588 * Restore stripped strings to $str. 589 */ 590 function wptexturize_replace_final( &$str ) { 591 global $wptexturize_strip_cnt, $wptexturize_strips, $wptexturize_adjusts; 592 593 // Finalize - restore stripped strings. 594 if ( $wptexturize_strip_cnt ) { 595 // Calculate offset adjustments. 596 foreach ( $wptexturize_adjusts as $entry ) { 597 list( $offset, $repl_len, $len ) = $entry; 598 for ( $i = $wptexturize_strip_cnt - 1; $i >= 0 && $offset < ( $strip_offset = &$wptexturize_strips[$i][1]); $i-- ) { 599 if ( $len > 1 && $offset + 1 < $strip_offset ) { 600 $strip_offset += $repl_len - $len; 601 } else { 602 $strip_offset += $repl_len - 1; 603 } 604 } 605 } 606 607 // Restore stripped strings. 608 $str_arr = str_split( $str ); // Using byte array (seems to be a bit quicker than substr_replace()). 609 array_unshift( $str_arr, '' ); 610 foreach ( $wptexturize_strips as $entry ) { 611 list( $strip, $offset ) = $entry; 612 $str_arr[$offset] .= $strip; 613 } 614 $str = implode( '', $str_arr ); 615 unset( $str_arr ); 616 /* If not using byte array. (Note need to store $len in wptexturize_replace_init()). 617 $diff = 0; 618 foreach ( $wptexturize_strips as $entry ) { 619 list( $strip, $offset, $len ) = $entry; 620 $str = substr_replace( $str, $strip, $offset + $diff, 0 ); 621 $diff += $len; 622 } 623 /**/ 624 $wptexturize_strip_cnt = 0; 625 } 626 } 627 628 /** 416 629 * Replaces double line-breaks with paragraph elements. 417 630 * 418 631 * A group of regex replaces used to identify text formatted with newlines and … … 665 878 . ')*+' // Loop possessively. 666 879 . '(?:-->)?'; // End of comment. If not found, match all input. 667 880 881 $nonsplit_regex = '\/?(?:a|abbr|b|big|br|cite|dfn|em|i|mark|q|s|samp|small|span|strong|sub|sup|u|var)(?![0-9A-Za-z])[^>]*>'; 882 668 883 $html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap. 669 884 '<' // Find start of element. 670 885 . '(?(?=!--)' // Is this a comment? 671 886 . $comment_regex // Find end of comment. 672 887 . '|' 888 . '(?!' . $nonsplit_regex . ')' // Exclude inline html elements. 673 889 . '[^>]*>?' // Find end of element. If not found, match all input. 674 890 . ')'; 675 891 } -
tests/phpunit/tests/formatting/WPTexturize.php
80 80 */ 81 81 function test_quotes() { 82 82 $this->assertEquals('“Quoted String”', wptexturize('"Quoted String"')); 83 //$this->assertEquals('Here is “<a href="http://example.com">a test with a link</a>”', wptexturize('Here is "<a href="http://example.com">a test with a link</a>"'));84 //$this->assertEquals('Here is “<a href="http://example.com">a test with a link and a period</a>”.', wptexturize('Here is "<a href="http://example.com">a test with a link and a period</a>".'));83 $this->assertEquals('Here is “<a href="http://example.com">a test with a link</a>”', wptexturize('Here is "<a href="http://example.com">a test with a link</a>"')); 84 $this->assertEquals('Here is “<a href="http://example.com">a test with a link and a period</a>”.', wptexturize('Here is "<a href="http://example.com">a test with a link and a period</a>".')); 85 85 $this->assertEquals('Here is “<a href="http://example.com">a test with a link</a>” and a space.', wptexturize('Here is "<a href="http://example.com">a test with a link</a>" and a space.')); 86 86 $this->assertEquals('Here is “<a href="http://example.com">a test with a link</a> and some text quoted”', wptexturize('Here is "<a href="http://example.com">a test with a link</a> and some text quoted"')); 87 //$this->assertEquals('Here is “<a href="http://example.com">a test with a link</a>”, and a comma.', wptexturize('Here is "<a href="http://example.com">a test with a link</a>", and a comma.'));88 //$this->assertEquals('Here is “<a href="http://example.com">a test with a link</a>”; and a semi-colon.', wptexturize('Here is "<a href="http://example.com">a test with a link</a>"; and a semi-colon.'));89 //$this->assertEquals('Here is “<a href="http://example.com">a test with a link</a>”- and a dash.', wptexturize('Here is "<a href="http://example.com">a test with a link</a>"- and a dash.'));90 //$this->assertEquals('Here is “<a href="http://example.com">a test with a link</a>”… and ellipses.', wptexturize('Here is "<a href="http://example.com">a test with a link</a>"... and ellipses.'));91 //$this->assertEquals('Here is “a test <a href="http://example.com">with a link</a>”.', wptexturize('Here is "a test <a href="http://example.com">with a link</a>".'));92 //$this->assertEquals('Here is “<a href="http://example.com">a test with a link</a>”and a work stuck to the end.', wptexturize('Here is "<a href="http://example.com">a test with a link</a>"and a work stuck to the end.'));87 $this->assertEquals('Here is “<a href="http://example.com">a test with a link</a>”, and a comma.', wptexturize('Here is "<a href="http://example.com">a test with a link</a>", and a comma.')); 88 $this->assertEquals('Here is “<a href="http://example.com">a test with a link</a>”; and a semi-colon.', wptexturize('Here is "<a href="http://example.com">a test with a link</a>"; and a semi-colon.')); 89 $this->assertEquals('Here is “<a href="http://example.com">a test with a link</a>”- and a dash.', wptexturize('Here is "<a href="http://example.com">a test with a link</a>"- and a dash.')); 90 $this->assertEquals('Here is “<a href="http://example.com">a test with a link</a>”… and ellipses.', wptexturize('Here is "<a href="http://example.com">a test with a link</a>"... and ellipses.')); 91 $this->assertEquals('Here is “a test <a href="http://example.com">with a link</a>”.', wptexturize('Here is "a test <a href="http://example.com">with a link</a>".')); 92 $this->assertEquals('Here is “<a href="http://example.com">a test with a link</a>”and a work stuck to the end.', wptexturize('Here is "<a href="http://example.com">a test with a link</a>"and a work stuck to the end.')); 93 93 $this->assertEquals('A test with a finishing number, “like 23”.', wptexturize('A test with a finishing number, "like 23".')); 94 94 $this->assertEquals('A test with a number, “like 62”, is nice to have.', wptexturize('A test with a number, "like 62", is nice to have.')); 95 95 } … … 144 144 */ 145 145 function test_other_html() { 146 146 $this->assertEquals('‘<strong>', wptexturize("'<strong>")); 147 //$this->assertEquals('‘<strong>Quoted Text</strong>’,', wptexturize("'<strong>Quoted Text</strong>',"));148 //$this->assertEquals('“<strong>Quoted Text</strong>”,', wptexturize('"<strong>Quoted Text</strong>",'));147 $this->assertEquals('‘<strong>Quoted Text</strong>’,', wptexturize("'<strong>Quoted Text</strong>',")); 148 $this->assertEquals('“<strong>Quoted Text</strong>”,', wptexturize('"<strong>Quoted Text</strong>",')); 149 149 } 150 150 151 151 function test_x() { … … 1444 1444 ), 1445 1445 array( 1446 1446 '[Let\'s get crazy<input>[caption code="<a href=\'?a[]=100\'>hello</a>"]</input>world]', // caption shortcode is invalid here because it contains [] chars. 1447 '[Let’s get crazy<input>[caption code=”<a href=\'?a[]=100\'>hello</a>“]</input>world]', 1447 //'[Let’s get crazy<input>[caption code=”<a href=\'?a[]=100\'>hello</a>“]</input>world]', // Incompatibility... (was seeing "] which went to open quote) 1448 '[Let’s get crazy<input>[caption code=”<a href=\'?a[]=100\'>hello</a>”]</input>world]', // Now sees [caption code="hello"] which goes to 2 closing quotes. 1448 1449 ), 1449 1450 array( 1450 1451 '<> ... <>', … … 1766 1767 "word word', she said", 1767 1768 "word word!closeq1!, she said", 1768 1769 ), 1770 array( 1771 '\'<a href="#"><small>word</small></a> "<i>42.00</i>" <b>word</b> <big>\'42\'</big> <em>42"</em> <sup>\'2\'</sup><br>"<i>21</i>" <i><b>2</b>x</i>21 <abbr>w</abbr>ord 0.<small>5</small>x<span>8</span>4<strong> \'</strong>4<var>t2\'', 1772 '!openq1!<a href="#"><small>word</small></a> !openq2!<i>42.00</i>!closeq2! <b>word</b> <big>!apos!42!closeq1!</big> <em>42!prime2!</em> <sup>!openq1!2!closeq1!</sup><br>!openq2!<i>21</i>!closeq2! <i><b>2</b>×</i>21 <abbr>w</abbr>ord 0.<small>5</small>×<span>8</span>4<strong> !openq1!</strong>4<var>t2!closeq1!', 1773 ), 1769 1774 ); 1770 1775 } 1771 1776 … … 2086 2091 require_once( DIR_TESTDATA . '/formatting/whole-posts.php' ); 2087 2092 return data_whole_posts(); 2088 2093 } 2089 } 2090 No newline at end of file 2094 2095 /** 2096 * Test that double quotes and apostrophes close correctly in the presence of inline html tags. 2097 * 2098 * @ticket 18549 2099 * @dataProvider data_inline_end_tags 2100 */ 2101 function test_inline_end_tags( $input, $output ) { 2102 return $this->assertEquals( $output, wptexturize( $input ) ); 2103 } 2104 2105 function data_inline_end_tags() { 2106 return array( 2107 array( 2108 'The word is "<a href="http://example.com/">quoted</a>".', 2109 'The word is “<a href="http://example.com/">quoted</a>”.', 2110 ), 2111 array( 2112 'The word is \'<a href="http://example.com/">quoted</a>\'', 2113 'The word is ‘<a href="http://example.com/">quoted</a>’', 2114 ), 2115 array( 2116 'The word is \'<a href="http://example.com/">quoted.</a>\'', 2117 'The word is ‘<a href="http://example.com/">quoted.</a>’', 2118 ), 2119 array( 2120 'The word is \'<a href="http://example.com/">quoted</a>\'.', 2121 'The word is ‘<a href="http://example.com/">quoted</a>’.', 2122 ), 2123 array( 2124 'The word is not \'<a href="http://example.com/">quot</a>\'d', 2125 'The word is not ‘<a href="http://example.com/">quot</a>’d', 2126 ), 2127 array( 2128 '<em>John</em>\'s', 2129 '<em>John</em>’s', 2130 ), 2131 array( 2132 '\'<em>John</em>\'s\'', 2133 '‘<em>John</em>’s’', 2134 ), 2135 array( 2136 '"<em>John</em>\'s"', 2137 '“<em>John</em>’s”', 2138 ), 2139 array( 2140 '<em>"John"</em>\'s', 2141 //'<em>“John”</em>’s', // Should be but... 2142 '<em>“John”</em>‘s', // Wrong: the ' in the stripped "John"'s seen as an opening single quote. 2143 ), 2144 array( 2145 '<em>\'John\'</em>\'s', 2146 //'<em>‘John’</em>’s', // Should be but... 2147 '<em>‘John”</em>s', // Wrong: the '' in the stripped 'John''s seen as a closing double tick quote. 2148 ), 2149 array( 2150 '<strong>Read more: </strong>"<a href="http://blah.com/test">Something (else)</a>"</p>', 2151 '<strong>Read more: </strong>“<a href="http://blah.com/test">Something (else)</a>”</p>', 2152 ), 2153 2154 ); 2155 } 2156 2157 /** 2158 * Test low-level wptexturize_replace_xxx routines. 2159 * @ticket wptexturize_replace_xxx 2160 */ 2161 function test_wptexturize_replace() { 2162 $orig_str = '<a href="#">0<abbr>1</abbr></a>2<b>2</b>345<big>5</big>6<br>77<dfn></em>8<i><samp>9</samp>A<small>B</small>CDE<span></span>F<strong>G</sub>[H<sup>]<var>I</var>'; 2163 2164 $str = $orig_str; 2165 wptexturize_replace_init( $str, '/<[^>]*>|\[H[^\]]*\]/' ); 2166 $this->assertEquals( '0122345567789ABCDEFGI', $str ); 2167 2168 wptexturize_replace_final( $str ); 2169 $this->assertEquals( $orig_str, $str ); 2170 2171 wptexturize_replace_init( $str, '/<[^>]*>|\[H[^\]]*\]/' ); 2172 $this->assertEquals( '0122345567789ABCDEFGI', $str ); 2173 2174 wptexturize_replace_str( $str, '5', 'EF' ); 2175 $this->assertEquals( '012234EFEF67789ABCDEFGI', $str ); 2176 2177 wptexturize_replace_regex( $str, '/2/', '2BB2' ); 2178 $this->assertEquals( '012BB22BB234EFEF67789ABCDEFGI', $str ); 2179 2180 wptexturize_replace_regex( $str, '/(3)4/', '$1FOUR' ); 2181 $this->assertEquals( '012BB22BB23FOUREFEF67789ABCDEFGI', $str ); 2182 2183 wptexturize_replace_str( $str, '8', 'B' ); 2184 $this->assertEquals( '012BB22BB23FOUREFEF677B9ABCDEFGI', $str ); 2185 2186 wptexturize_replace_regex( $str, '/7/', '7EFEN' ); 2187 $this->assertEquals( '012BB22BB23FOUREFEF67EFEN7EFENB9ABCDEFGI', $str ); 2188 2189 wptexturize_replace_str( $str, '6', '666' ); 2190 $this->assertEquals( '012BB22BB23FOUREFEF6667EFEN7EFENB9ABCDEFGI', $str ); 2191 2192 wptexturize_replace_str( $str, '1', '12' ); 2193 $this->assertEquals( '0122BB22BB23FOUREFEF6667EFEN7EFENB9ABCDEFGI', $str ); 2194 2195 wptexturize_replace_str( $str, 'BC', 'BCBC' ); 2196 $this->assertEquals( '0122BB22BB23FOUREFEF6667EFEN7EFENB9ABCBCDEFGI', $str ); 2197 2198 wptexturize_replace_regex( $str, '/3(FO)U/', 'THREE$1EWE' ); 2199 $this->assertEquals( '0122BB22BB2THREEFOEWEREFEF6667EFEN7EFENB9ABCBCDEFGI', $str ); 2200 2201 wptexturize_replace_regex( $str, '/(E)F/', '$1' ); 2202 $this->assertEquals( '0122BB22BB2THREEOEWEREE6667EEN7EENB9ABCBCDEGI', $str ); 2203 2204 wptexturize_replace_regex( $str, '/G/', '' ); 2205 $this->assertEquals( '0122BB22BB2THREEOEWEREE6667EEN7EENB9ABCBCDEI', $str ); 2206 2207 wptexturize_replace_final( $str ); 2208 $this->assertEquals( '<a href="#">0<abbr>12</abbr></a>2BB2<b>2BB2</b>THREEOEWERE<big>E</big>666<br>7EEN7EEN<dfn></em>B<i><samp>9</samp>A<small>BCBC</small>DE<span></span><strong></sub>[H<sup>]<var>I</var>', $str ); 2209 } 2210 }