Ticket #18549: 18549_wptexturize.3.diff
File 18549_wptexturize.3.diff, 26.0 KB (added by , 9 years ago) |
---|
-
src/wp-includes/formatting.php
54 54 $closing_quote = null, 55 55 $opening_single_quote = null, 56 56 $closing_single_quote = null, 57 $open_q_flag = '<!--oq-->', 58 $open_sq_flag = '<!--osq-->', 59 $apos_flag = '<!--apos-->'; 57 $apos_flag, $open_sq_flag, $open_q_flag, $close_sq_flag, $close_q_flag, $prime_sq_flag, $prime_q_flag, $sq_flag, $q_flag, $primes_flag, 58 $flags_sq, $flags_q, $reals_sq, $reals_q, 59 $nonsplit_regex, $comment_regex, 60 $static_no_texturize_shortcodes = null, $no_texturize_shortcode_regex, 61 $static_shortcode_tags = null, $shortcode_regex, 62 $spaces; 60 63 61 64 // If there's nothing to do, just stop. 62 65 if ( empty( $text ) || false === $run_texturize ) { … … 107 110 /* translators: em dash */ 108 111 $em_dash = _x( '—', 'em dash' ); 109 112 113 // Standardize size of flags to max of primes/quotes manipulated by wptexturize_primes(). 114 // This will allow wptexturize_primes() to do its replacements without worrying about offsets changing. 115 $flag_len = max( 5, strlen( $closing_quote ), strlen( $prime ), strlen( $double_prime ), strlen( $closing_single_quote ) ); 116 117 $apos_flag = str_pad( '<i a>', $flag_len, '>' ); 118 $open_sq_flag = str_pad( '<i o>', $flag_len, '>' ); 119 $close_sq_flag = str_pad( '<i c>', $flag_len, '>' ); 120 $prime_sq_flag = str_pad( '<i p>', $flag_len, '>' ); 121 $prime_q_flag = str_pad( '<i P>', $flag_len, '>' ); 122 $open_q_flag = str_pad( '<i O>', $flag_len, '>' ); 123 $close_q_flag = str_pad( '<i C>', $flag_len, '>' ); 124 $sq_flag = str_repeat( "'", $flag_len ); 125 $q_flag = str_repeat( '"', $flag_len ); 126 $primes_flag = str_pad( '<i f>', $flag_len, '>' ); 127 128 // Flags & reals arrays - used to reinstate the real values. 129 $flags_sq = array( $sq_flag, $prime_sq_flag, $open_sq_flag, $close_sq_flag, $apos_flag ); 130 $reals_sq = array( "'", $prime, $opening_single_quote, $closing_single_quote, $apos ); 131 $flags_q = array( $q_flag, $prime_q_flag, $open_q_flag, $close_q_flag ); 132 $reals_q = array( '"', $double_prime, $opening_quote, $closing_quote ); 133 110 134 $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt'); 111 135 $default_no_texturize_shortcodes = array('code'); 112 136 … … 139 163 140 164 // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation. 141 165 if ( "'" !== $apos || "'" !== $closing_single_quote ) { 142 $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $clos ing_single_quote;166 $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $close_sq_flag; 143 167 } 144 168 if ( "'" !== $apos || '"' !== $closing_quote ) { 145 $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $clos ing_quote;169 $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $close_q_flag; 146 170 } 147 171 148 172 // '99 '99s '99's (apostrophe) But never '9 or '99% or '999 or '99.0. … … 151 175 } 152 176 153 177 // Quoted Numbers like '0.42' 154 if ( "'" !== $opening_single_quote &&"'" !== $closing_single_quote ) {155 $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $clos ing_single_quote;178 if ( "'" !== $opening_single_quote || "'" !== $closing_single_quote ) { 179 $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $close_sq_flag; 156 180 } 157 181 158 182 // Single quote at start, or preceded by (, {, <, [, ", -, or spaces. … … 170 194 $dynamic = array(); 171 195 172 196 // Quoted Numbers like "42" 173 if ( '"' !== $opening_quote &&'"' !== $closing_quote ) {174 $dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $open_q_flag . '$1' . $clos ing_quote;197 if ( '"' !== $opening_quote || '"' !== $closing_quote ) { 198 $dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $open_q_flag . '$1' . $close_q_flag; 175 199 } 176 200 177 201 // Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces. … … 191 215 192 216 $dynamic_characters['dash'] = array_keys( $dynamic ); 193 217 $dynamic_replacements['dash'] = array_values( $dynamic ); 218 219 $nonsplit_regex = '\/?(?:a\b|abbr|b\b|big|br|dfn|em|i\b|samp|small|span|strong|sub|sup|var)[^>]*>'; 220 221 // Might as well initialize the comment regex once seeing as it's invariant. 222 $comment_regex = 223 '!' // Start of comment, after the <. 224 . '(?:' // Unroll the loop: Consume everything until --> is found. 225 . '-(?!->)' // Dash not followed by end of comment. 226 . '[^\-]*+' // Consume non-dashes. 227 . ')*+' // Loop possessively. 228 . '(?:-->)?'; // End of comment. If not found, match all input. 194 229 } 195 230 196 231 // Must do this every time in case plugins use these filters in a context sensitive manner … … 214 249 $no_texturize_tags_stack = array(); 215 250 $no_texturize_shortcodes_stack = array(); 216 251 217 // Look for shortcodes and HTML elements. 252 // Set up shortcodes regular expression (used to strip within each split text part), if haven't already or if things changed. 253 if ( $static_shortcode_tags === null || $shortcode_tags !== $static_shortcode_tags ) { 254 $static_shortcode_tags = $shortcode_tags; 255 $static_no_texturize_shortcodes = null; // Force reset of no texturize shortcodes as they need to be registered to be ignored. 256 if ( $shortcode_tags ) { 257 $tagregexp = join( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) ); 258 $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex() (which matches whole shortcode, including content, so can't use here). 259 $shortcode_regex = 260 '|' 261 . '\[' // Find start of shortcode. 262 . '[\/\[]?' // Shortcodes may begin with [/ or [[ 263 . $tagregexp // Only match registered shortcodes, because performance. 264 . '(?:' 265 . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. 266 . '|' 267 . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. 268 . ')*+' // Possessive critical. 269 . '\]' // Find end of shortcode. 270 . '\]?'; // Shortcodes may end with ]] 271 } else { 272 $shortcode_regex = ''; 273 } 274 } 218 275 219 $tagnames = array_keys( $shortcode_tags ); 220 $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) ); 221 $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex(). 276 // Set up no texturize shortcodes regular expression (used to split text input), if haven't already or if things changed. 277 if ( $static_no_texturize_shortcodes === null || $no_texturize_shortcodes !== $static_no_texturize_shortcodes ) { 278 $static_no_texturize_shortcodes = $no_texturize_shortcodes; 279 // No texturize shortcodes must also be registered to be ignored, so intersect with registered shortcodes array. 280 $tagnames = array_intersect( $no_texturize_shortcodes, array_keys( $static_shortcode_tags ) ); 281 if ( $tagnames ) { 282 $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) ); 283 $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex(). 284 $no_texturize_shortcode_regex = 285 '|' 286 . '\[' // Find start of shortcode. 287 . '[\/\[]?' // Shortcodes may begin with [/ or [[ 288 . $tagregexp // Only match no texturize shortcodes. 289 . '(?:' 290 . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. 291 . '|' 292 . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. 293 . ')*+' // Possessive critical. 294 . '\]' // Find end of shortcode. 295 . '\]?'; // Shortcodes may end with ]] 296 } else { 297 $no_texturize_shortcode_regex = ''; 298 } 299 } 222 300 223 $comment_regex = 224 '!' // Start of comment, after the <. 225 . '(?:' // Unroll the loop: Consume everything until --> is found. 226 . '-(?!->)' // Dash not followed by end of comment. 227 . '[^\-]*+' // Consume non-dashes. 228 . ')*+' // Loop possessively. 229 . '(?:-->)?'; // End of comment. If not found, match all input. 301 // Look for comments, non-inline (non-split) HTML elements and no texturize shortcodes. 230 302 231 $shortcode_regex =232 '\[' // Find start of shortcode.233 . '[\/\[]?' // Shortcodes may begin with [/ or [[234 . $tagregexp // Only match registered shortcodes, because performance.235 . '(?:'236 . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical.237 . '|'238 . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >.239 . ')*+' // Possessive critical.240 . '\]' // Find end of shortcode.241 . '\]?'; // Shortcodes may end with ]]242 243 303 $regex = 244 304 '/(' // Capture the entire match. 245 305 . '<' // Find start of element. … … 246 306 . '(?(?=!--)' // Is this a comment? 247 307 . $comment_regex // Find end of comment. 248 308 . '|' 309 . '(?!' . $nonsplit_regex . ')' // Exclude inline html elements. 249 310 . '[^>]*>' // Find end of element. 250 311 . ')' 251 . '|' 252 . $shortcode_regex // Find shortcodes. 312 . $no_texturize_shortcode_regex // Find no texturize shortcodes. 253 313 . ')/s'; 254 314 255 $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);315 $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); 256 316 257 foreach ( $textarr as &$curl ) { 258 // Only call _wptexturize_pushpop_element if $curl is a delimiter. 259 $first = $curl[0]; 260 if ( '<' === $first && '<!--' === substr( $curl, 0, 4 ) ) { 261 // This is an HTML comment delimeter. 317 foreach ( $textarr as $curl_idx => &$curl ) { 318 if ( 1 === $curl_idx % 2 ) { 319 // Delimiter. 320 $first = $curl[0]; 321 if ( '<' === $first ) { 322 // If not a comment. 323 if ( '<!--' !== substr( $curl, 0, 4 ) ) { 324 // This is an HTML element delimiter. 262 325 263 continue; 326 _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags ); 327 } 328 } elseif ( '[' === $first ) { 329 // This is a shortcode delimiter. 264 330 265 } elseif ( '<' === $first && '>' === substr( $curl, -1 ) ) { 266 // This is an HTML element delimiter. 331 if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) { 332 // Looks like a normal shortcode. 333 _wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes ); 334 } else { 335 // Looks like an escaped shortcode. 336 } 337 } 338 } elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) && '' !== trim( $curl ) ) { 339 // This is neither a delimiter, nor is this content inside of no_texturize pairs. Do texturize. 267 340 268 _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags ); 341 // Add a space to any <br>s so that when stripped will be recognized as whitespace. 342 if ( $have_br = ( false !== stripos( $curl, '<br' ) ) ) { 343 $curl = preg_replace( '/<br[^>]*>/i', '$0 ', $curl ); 344 } 269 345 270 } elseif ( '' === trim( $curl ) ) { 271 // This is a newline between delimiters. Performance improves when we check this. 346 if ( wptexturize_replace_init( $curl, '/<[^>]*>' . $shortcode_regex . '/S' ) ) { // The study option here makes a big difference. 272 347 273 continue;348 wptexturize_replace_str( $curl, $static_characters, $static_replacements ); 274 349 275 } elseif ( '[' === $first && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) { 276 // This is a shortcode delimiter. 350 if ( false !== strpos( $curl, "'" ) ) { 351 wptexturize_replace_regex( $curl, $dynamic_characters['apos'], $dynamic_replacements['apos'] ); 352 wptexturize_replace_str( $curl, "'", $sq_flag ); // Substitute single quotes with same-sized dummy so that wptexturize_primes() doesn't alter size of string. 353 $curl = wptexturize_primes( $curl, $sq_flag, $prime_sq_flag, $open_sq_flag, $close_sq_flag, $primes_flag, $spaces ); 354 wptexturize_replace_str( $curl, $flags_sq, $reals_sq ); // Reinstate real values. 355 } 356 if ( false !== strpos( $curl, '"' ) ) { 357 wptexturize_replace_regex( $curl, $dynamic_characters['quote'], $dynamic_replacements['quote'] ); 358 wptexturize_replace_str( $curl, '"', $q_flag ); // Substitute double quotes with same-sized dummy so that wptexturize_primes() doesn't alter size of string. 359 $curl = wptexturize_primes( $curl, $q_flag, $prime_q_flag, $open_q_flag, $close_q_flag, $primes_flag, $spaces ); 360 wptexturize_replace_str( $curl, $flags_q, $reals_q ); // Reinstate real values. 361 } 362 if ( false !== strpos( $curl, '-' ) ) { 363 wptexturize_replace_regex( $curl, $dynamic_characters['dash'], $dynamic_replacements['dash'] ); 364 } 277 365 278 if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) { 279 // Looks like a normal shortcode. 280 _wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes ); 366 // 9x9 (times), but never 0x9999 367 if ( 1 === preg_match( '/(?<=\d)x\d/', $curl ) ) { 368 // Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one! 369 wptexturize_replace_regex( $curl, '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(?=\d[\d\.,]*\b)/', '$1×' ); // Changed to use look ahead as can only deal with a single sub-replacement. 370 } 371 372 wptexturize_replace_final( $curl ); 373 281 374 } else { 282 // Looks like an escaped shortcode.283 continue;284 }285 375 286 } elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) ) { 287 // This is neither a delimiter, nor is this content inside of no_texturize pairs. Do texturize. 376 $curl = str_replace( $static_characters, $static_replacements, $curl ); 288 377 289 $curl = str_replace( $static_characters, $static_replacements, $curl ); 378 if ( false !== strpos( $curl, "'" ) ) { 379 $curl = preg_replace( $dynamic_characters['apos'], $dynamic_replacements['apos'], $curl ); 380 $curl = wptexturize_primes( $curl, "'", $prime, $open_sq_flag, $close_sq_flag, $primes_flag, $spaces ); 381 $curl = str_replace( array( $apos_flag, $open_sq_flag, $close_sq_flag ), array( $apos, $opening_single_quote, $closing_single_quote ), $curl ); 382 } 383 if ( false !== strpos( $curl, '"' ) ) { 384 $curl = preg_replace( $dynamic_characters['quote'], $dynamic_replacements['quote'], $curl ); 385 $curl = wptexturize_primes( $curl, '"', $double_prime, $open_q_flag, $close_q_flag, $primes_flag, $spaces ); 386 $curl = str_replace( array( $open_q_flag, $close_q_flag ), array( $opening_quote, $closing_quote ), $curl ); 387 } 388 if ( false !== strpos( $curl, '-' ) ) { 389 $curl = preg_replace( $dynamic_characters['dash'], $dynamic_replacements['dash'], $curl ); 390 } 290 391 291 if ( false !== strpos( $curl, "'" ) ) {292 $curl = preg_replace( $dynamic_characters['apos'], $dynamic_replacements['apos'], $curl );293 $curl = wptexturize_primes( $curl, "'", $prime, $open_sq_flag, $closing_single_quote );294 $curl = str_replace( $apos_flag, $apos, $curl );295 $curl = str_replace( $open_sq_flag, $opening_single_quote, $curl );392 // 9x9 (times), but never 0x9999 393 if ( 1 === preg_match( '/(?<=\d)x\d/', $curl ) ) { 394 // Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one! 395 $curl = preg_replace( '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(\d[\d\.,]*)\b/', '$1×$2', $curl ); 396 } 296 397 } 297 if ( false !== strpos( $curl, '"' ) ) {298 $curl = preg_replace( $dynamic_characters['quote'], $dynamic_replacements['quote'], $curl );299 $curl = wptexturize_primes( $curl, '"', $double_prime, $open_q_flag, $closing_quote );300 $curl = str_replace( $open_q_flag, $opening_quote, $curl );301 }302 if ( false !== strpos( $curl, '-' ) ) {303 $curl = preg_replace( $dynamic_characters['dash'], $dynamic_replacements['dash'], $curl );304 }305 398 306 // 9x9 (times), but never 0x9999 307 if ( 1 === preg_match( '/(?<=\d)x\d/', $curl ) ) { 308 // Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one! 309 $curl = preg_replace( '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(\d[\d\.,]*)\b/', '$1×$2', $curl ); 399 // Remove any spaces added to <br>s at the start. 400 if ( $have_br ) { 401 $curl = preg_replace( '/(<br[^>]*>) /i', '$1', $curl ); 310 402 } 311 403 } 312 404 } … … 330 422 * @param string $close_quote The closing quote char to use for replacement. 331 423 * @return string The $haystack value after primes and quotes replacements. 332 424 */ 333 function wptexturize_primes( $haystack, $needle, $prime, $open_quote, $close_quote ) { 334 $spaces = wp_spaces_regexp(); 335 $flag = '<!--wp-prime-or-quote-->'; 425 function wptexturize_primes( $haystack, $needle, $prime, $open_quote, $close_quote, $flag, $spaces ) { 426 $flag_len = strlen( $flag ); 336 427 $quote_pattern = "/$needle(?=\\Z|[.,:;!?)}\\-\\]]|>|" . $spaces . ")/"; 337 428 $prime_pattern = "/(?<=\\d)$needle/"; 338 429 $flag_after_digit = "/(?<=\\d)$flag/"; … … 359 450 // This is most likely to be problematic in the context of bug #18549. 360 451 $pos = strrpos( $sentence, $flag ); 361 452 } 362 $sentence = substr_replace( $sentence, $close_quote, $pos, strlen( $flag ));453 $sentence = substr_replace( $sentence, $close_quote, $pos, $flag_len ); 363 454 } 364 455 // Use conventional replacement on any remaining primes and quotes. 365 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); 366 $sentence = preg_replace( $flag_after_digit, $prime, $sentence ); 456 $sentence = preg_replace( array( $prime_pattern, $flag_after_digit ), $prime, $sentence ); 367 457 $sentence = str_replace( $flag, $close_quote, $sentence ); 368 } elseif ( 1 == $count ) {458 } elseif ( 1 === $count ) { 369 459 // Found only one closing quote candidate, so give it priority over primes. 370 460 $sentence = str_replace( $flag, $close_quote, $sentence ); 371 461 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); … … 374 464 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); 375 465 } 376 466 } else { 377 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); 378 $sentence = preg_replace( $quote_pattern, $close_quote, $sentence ); 467 $sentence = preg_replace( array( $prime_pattern, $quote_pattern ), array( $prime, $close_quote ), $sentence ); 379 468 } 380 if ( '"' == $needle && false !== strpos( $sentence, '"') ) {381 $sentence = str_replace( '"', $close_quote, $sentence );469 if ( '"' === $needle[0] && false !== strpos( $sentence, $needle ) ) { 470 $sentence = str_replace( $needle, $close_quote, $sentence ); 382 471 } 383 472 } 384 473 … … 402 491 function _wptexturize_pushpop_element( $text, &$stack, $disabled_elements ) { 403 492 // Is it an opening tag or closing tag? 404 493 if ( '/' !== $text[1] ) { 405 $opening_tag = true; 406 $name_offset = 1; 407 } elseif ( 0 == count( $stack ) ) { 408 // Stack is empty. Just stop. 409 return; 410 } else { 411 $opening_tag = false; 412 $name_offset = 2; 413 } 414 415 // Parse out the tag name. 416 $space = strpos( $text, ' ' ); 417 if ( false === $space ) { 418 $space = -1; 419 } else { 420 $space -= $name_offset; 421 } 422 $tag = substr( $text, $name_offset, $space ); 423 424 // Handle disabled tags. 425 if ( in_array( $tag, $disabled_elements ) ) { 426 if ( $opening_tag ) { 494 $space = strpos( $text, ' ' ); 495 if ( $space === false ) { 496 $tag = substr( $text, 1, -1 ); 497 } else { 498 $tag = substr( $text, 1, $space - 1 ); 499 } 500 if ( in_array( $tag, $disabled_elements ) ) { // If $disabled_elements was array_flipped then could use hash lookup isset( $disabled_elemenets[$tag] ) here instead of linear lookup. 427 501 /* 428 502 * This disables texturize until we find a closing tag of our type 429 503 * (e.g. <pre>) even if there was invalid nesting before that … … 431 505 * Example: in the case <pre>sadsadasd</code>"baba"</pre> 432 506 * "baba" won't be texturize 433 507 */ 434 435 array_push( $stack, $tag ); 436 } elseif ( end( $stack ) == $tag ) { 508 $stack[] = $tag; 509 } 510 } elseif ( $stack ) { 511 $space = strpos( $text, ' ' ); 512 if ( $space === false ) { 513 $tag = substr( $text, 2, -1 ); 514 } else { 515 $tag = substr( $text, 2, $space - 2 ); 516 } 517 if ( in_array( $tag, $disabled_elements ) && end( $stack ) === $tag ) { // Sim. could use isset( $disabled_elemenets[$tag] ) if above. 437 518 array_pop( $stack ); 438 519 } 439 520 } … … 440 521 } 441 522 442 523 /** 524 * Initialize the stripped string routines wptexturize_replace_XXX, setting the globals used. 525 * $str will be stripped of any strings that match the regular expression $search. 526 */ 527 function wptexturize_replace_init( &$str, $search ) { 528 global $wptexturize_strip_cnt, $wptexturize_strips, $wptexturize_adjusts; 529 530 $wptexturize_strip_cnt = 0; 531 532 if ( preg_match_all( $search, $str, $matches, PREG_OFFSET_CAPTURE ) ) { 533 $wptexturize_strips = $wptexturize_adjusts = $strs = array(); 534 $diff = 0; 535 foreach ( $matches[0] as list( $match, $offset ) ) { 536 $len = strlen( $match ); 537 // Save details of stripped string. 538 $wptexturize_strips[] = array( $match, $offset - $diff /*, $len /* Store len if not using byte array in wptexturize_replace_final(). */ ); 539 $diff += $len; 540 $strs[] = $match; // If using str_replace rather than (safer) preg_replace. 541 } 542 if ( $wptexturize_strip_cnt = count( $wptexturize_strips ) ) { 543 $str = str_replace( $strs, '', $str ); // Assuming simple matches replaceable in whole string (otherwise need to do preg_replace( $search, '', $str )). 544 } 545 } 546 return $wptexturize_strip_cnt; 547 } 548 549 /** 550 * Do a straight (non-regexp) string substitution, keeping tabs on the offset adjustments if have a stripped string. 551 */ 552 function wptexturize_replace_str( &$str, $search, $repl ) { 553 global $wptexturize_strip_cnt, $wptexturize_adjusts; 554 555 if ( $wptexturize_strip_cnt ) { 556 // Process simple string search, given replacement string $repl. 557 $searches = is_array( $search ) ? $search : array( $search ); 558 $repls = is_array( $repl ) ? $repl : array( $repl ); 559 560 // As replacements could interfere with later ones, treat each separately. 561 foreach ( $searches as $idx => $search_str ) { 562 if ( false !== ( $offset = strpos( $str, $search_str ) ) ) { 563 $repl_str = $repls[$idx]; 564 $repl_len = strlen( $repl_str ); 565 $len = strlen( $search_str ); 566 $diff_len = $repl_len - $len; 567 if ( $diff_len ) { 568 $diff = 0; 569 do { 570 // Store adjustment details. 571 $wptexturize_adjusts[] = array( $offset + $diff, $repl_len, $len ); 572 $diff += $diff_len; 573 } while ( false !== ( $offset = strpos( $str, $search_str, $offset + $len ) ) ); 574 } 575 $str = str_replace( $search_str, $repl_str, $str ); 576 } 577 } 578 } else { 579 $str = str_replace( $search, $repl, $str ); 580 } 581 } 582 583 /** 584 * Do a regexp string substitution, keeping tabs on the offset adjustments if have a stripped string. 585 */ 586 function wptexturize_replace_regex( &$str, $search, $repl ) { 587 global $wptexturize_strip_cnt, $wptexturize_adjusts; 588 589 if ( $wptexturize_strip_cnt ) { 590 // Process regex, given replacement string $repl. 591 $searches = is_array( $search ) ? $search : array( $search ); 592 $repls = is_array( $repl ) ? $repl : array( $repl ); 593 594 // As replacements could interfere with later ones, treat each separately. 595 foreach ( $searches as $idx => $re ) { 596 if ( preg_match_all( $re, $str, $matches, PREG_OFFSET_CAPTURE ) ) { 597 $repl_str = $repls[$idx]; 598 $repl_len = strlen( $repl_str ); 599 $diff = 0; 600 // Allow for a single captured replacement. 601 if ( false !== ( $pos1 = strpos( $repl_str, '$1' ) ) ) { 602 foreach ( $matches[0] as $i => list( $match, $offset ) ) { 603 // For a 'pre$1post' replacement, need to track pre-submatch replace and then post-submatch replace. 604 $pre_repl_len = $pos1; 605 $pre_len = $matches[1][$i][1] - $offset; // Submatch offset less full match offset. 606 if ( $pre_repl_len !== $pre_len ) { 607 // Store adjustment details. 608 $wptexturize_adjusts[] = array( $offset + $diff, $pre_repl_len, $pre_len ); 609 $diff += $pre_repl_len - $pre_len; 610 } 611 $len1 = strlen( $matches[1][$i][0] ); // Length of submatch string. 612 $post_repl_len = $repl_len - ( $pre_repl_len + 2 ); 613 $post_len = strlen( $match ) - ( $pre_len + $len1 ); 614 if ( $post_repl_len !== $post_len ) { 615 // Store adjustment details. 616 $offset += $pre_len + $len1; // Jump over substituted pre-string & submatch. 617 $wptexturize_adjusts[] = array( $offset + $diff, $post_repl_len, $post_len ); 618 $diff += $post_repl_len - $post_len; 619 } 620 } 621 } else { 622 foreach ( $matches[0] as list( $match, $offset ) ) { 623 $len = strlen( $match ); 624 if ( $repl_len !== $len ) { 625 // Store adjustment details. 626 $wptexturize_adjusts[] = array( $offset + $diff, $repl_len, $len ); 627 $diff += $repl_len - $len; 628 } 629 } 630 } 631 $str = preg_replace( $re, $repl_str, $str ); 632 } 633 } 634 } else { 635 $str = preg_replace( $search, $repl, $str ); 636 } 637 } 638 639 /** 640 * Restore stripped strings to $str. 641 */ 642 function wptexturize_replace_final( &$str ) { 643 global $wptexturize_strip_cnt, $wptexturize_strips, $wptexturize_adjusts; 644 645 // Finalize - restore stripped strings. 646 if ( $wptexturize_strip_cnt ) { 647 // Calculate offset adjustments. 648 foreach ( $wptexturize_adjusts as list( $offset, $repl_len, $len ) ) { 649 for ( $i = $wptexturize_strip_cnt - 1; $i >= 0 && $offset < ( $strip_offset = &$wptexturize_strips[$i][1]); $i-- ) { 650 if ( $len > 1 && $offset + 1 < $strip_offset ) { 651 $strip_offset += $repl_len - $len; 652 } else { 653 $strip_offset += $repl_len - 1; 654 } 655 } 656 } 657 658 // Restore stripped strings. 659 $str_arr = str_split( $str ); // Using byte array (seems to be a bit quicker than substr_replace()). 660 array_unshift( $str_arr, '' ); 661 foreach ( $wptexturize_strips as list( $strip, $offset ) ) { 662 $str_arr[$offset] .= $strip; 663 } 664 $str = implode( '', $str_arr ); 665 unset( $str_arr ); 666 /* If not using byte array. (Note need to store $len in wptexturize_replace_init()). 667 $diff = 0; 668 foreach ( $wptexturize_strips as list( $strip, $offset, $len ) ) { 669 $str = substr_replace( $str, $strip, $offset + $diff, 0 ); 670 $diff += $len; 671 } 672 /**/ 673 $wptexturize_strip_cnt = 0; 674 } 675 } 676 677 /** 443 678 * Replaces double line-breaks with paragraph elements. 444 679 * 445 680 * A group of regex replaces used to identify text formatted with newlines and