Ticket #18549: 18549_wptexturize.diff
File 18549_wptexturize.diff, 23.2 KB (added by , 9 years ago) |
---|
-
src/wp-includes/formatting.php
54 54 $closing_quote = null, 55 55 $opening_single_quote = null, 56 56 $closing_single_quote = null, 57 $open_q_flag = '<!--oq-->', 58 $open_sq_flag = '<!--osq-->', 59 $apos_flag = '<!--apos-->'; 57 $apos_flag = null, 58 $open_sq_flag = null, 59 $open_q_flag = null, 60 $close_sq_flag = null, 61 $close_q_flag = null, 62 $prime_sq_flag = null, 63 $prime_q_flag = null, 64 $sq_flag = null, 65 $q_flag = null, 66 $flags = null, 67 $reals = null, 68 $regexs = null, 69 $static_no_texturize_shortcodes = null, 70 $static_shortcode_tags = null; 60 71 61 72 // If there's nothing to do, just stop. 62 73 if ( empty( $text ) || false === $run_texturize ) { … … 63 74 return $text; 64 75 } 65 76 77 $spaces = wp_spaces_regexp(); 78 66 79 // Set up static variables. Run once only. 67 80 if ( $reset || ! isset( $static_characters ) ) { 68 81 /** … … 107 120 /* translators: em dash */ 108 121 $em_dash = _x( '—', 'em dash' ); 109 122 123 // Standardize size of flags to max of primes/quotes manipulated by wptexturize_primes(). 124 // This will allow wptexturize_primes() to do its replacements without worrying about offsets changing. 125 $dummy_len = max( 5, strlen( $closing_quote ), strlen( $prime ), strlen( $double_prime ), strlen( $closing_single_quote ) ); 126 127 $apos_flag = str_pad( '<i a>', $dummy_len, '>' ); 128 $open_sq_flag = str_pad( '<i o>', $dummy_len, '>' ); 129 $open_q_flag = str_pad( '<i O>', $dummy_len, '>' ); 130 $close_sq_flag = str_pad( '<i c>', $dummy_len, '>' ); 131 $close_q_flag = str_pad( '<i C>', $dummy_len, '>' ); 132 $prime_sq_flag = str_pad( '<i p>', $dummy_len, '>' ); 133 $prime_q_flag = str_pad( '<i P>', $dummy_len, '>' ); 134 $sq_flag = str_repeat( "'", $dummy_len ); 135 $q_flag = str_repeat( '"', $dummy_len ); 136 137 // Flags & reals arrays - used to reinstate the real values. 138 $flags = array( 139 'sq' => array( $sq_flag, $prime_sq_flag, $open_sq_flag, $close_sq_flag, $apos_flag ), 140 'q' => array( $q_flag, $prime_q_flag, $open_q_flag, $close_q_flag ), 141 ); 142 $reals = array( 143 'sq' => array( "'", $prime, $opening_single_quote, $closing_single_quote, $apos ), 144 'q' => array( '"', $double_prime, $opening_quote, $closing_quote ), 145 ); 146 110 147 $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt'); 111 148 $default_no_texturize_shortcodes = array('code'); 112 149 … … 135 172 $dynamic_characters = array( 'apos' => array(), 'quote' => array(), 'dash' => array() ); 136 173 $dynamic_replacements = array( 'apos' => array(), 'quote' => array(), 'dash' => array() ); 137 174 $dynamic = array(); 138 $spaces = wp_spaces_regexp();139 175 140 176 // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation. 141 177 if ( "'" !== $apos || "'" !== $closing_single_quote ) { 142 $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $clos ing_single_quote;178 $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $close_sq_flag; 143 179 } 144 180 if ( "'" !== $apos || '"' !== $closing_quote ) { 145 $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $clos ing_quote;181 $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $close_q_flag; 146 182 } 147 183 148 184 // '99 '99s '99's (apostrophe) But never '9 or '99% or '999 or '99.0. … … 152 188 153 189 // Quoted Numbers like '0.42' 154 190 if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) { 155 $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $clos ing_single_quote;191 $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $close_sq_flag; 156 192 } 157 193 158 194 // Single quote at start, or preceded by (, {, <, [, ", -, or spaces. … … 171 207 172 208 // Quoted Numbers like "42" 173 209 if ( '"' !== $opening_quote && '"' !== $closing_quote ) { 174 $dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $open_q_flag . '$1' . $clos ing_quote;210 $dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $open_q_flag . '$1' . $close_q_flag; 175 211 } 176 212 177 213 // Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces. … … 191 227 192 228 $dynamic_characters['dash'] = array_keys( $dynamic ); 193 229 $dynamic_replacements['dash'] = array_values( $dynamic ); 230 231 $regexs['nonsplit'] = '\/?(?:a\b|abbr|b\b|big|br|dfn|em|i\b|samp|small|span|strong|sub|sup|var)[^>]*>'; 232 233 // Might as well initialize the comment regex once seeing as it's invariant. 234 $regexs['comment'] = 235 '!' // Start of comment, after the <. 236 . '(?:' // Unroll the loop: Consume everything until --> is found. 237 . '-(?!->)' // Dash not followed by end of comment. 238 . '[^\-]*+' // Consume non-dashes. 239 . ')*+' // Loop possessively. 240 . '(?:-->)?'; // End of comment. If not found, match all input. 194 241 } 195 242 196 243 // Must do this every time in case plugins use these filters in a context sensitive manner … … 214 261 $no_texturize_tags_stack = array(); 215 262 $no_texturize_shortcodes_stack = array(); 216 263 217 // Look for shortcodes and HTML elements. 264 // Set up shortcodes regular expression (used to strip within each split text part), if haven't already or if things changed. 265 if ( $static_shortcode_tags === null || $shortcode_tags !== $static_shortcode_tags ) { 266 $static_shortcode_tags = $shortcode_tags; 267 $static_no_texturize_shortcodes = null; // Force reset of no texturize shortcodes as they need to be registered to be ignored. 268 $tagnames = array_keys( $shortcode_tags ); 269 if ( $tagnames ) { 270 $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) ); 271 $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex(). 272 $regexs['shortcode'] = 273 '|' 274 . '\[' // Find start of shortcode. 275 . '[\/\[]?' // Shortcodes may begin with [/ or [[ 276 . $tagregexp // Only match registered shortcodes, because performance. 277 . '(?:' 278 . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. 279 . '|' 280 . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. 281 . ')*+' // Possessive critical. 282 . '\]' // Find end of shortcode. 283 . '\]?'; // Shortcodes may end with ]] 284 } else { 285 $regexs['shortcode'] = ''; 286 } 287 } 218 288 219 $tagnames = array_keys( $shortcode_tags ); 220 $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) ); 221 $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex(). 289 // Set up no texturize shortcodes regular expression (used to split text input), if haven't already or if things changed. 290 if ( $static_no_texturize_shortcodes === null || $no_texturize_shortcodes !== $static_no_texturize_shortcodes ) { 291 $static_no_texturize_shortcodes = $no_texturize_shortcodes; 292 // No texturize shortcodes must also be registered to be ignored, so intersect with registered shortcodes array. 293 $tagnames = array_intersect( $no_texturize_shortcodes, array_keys( $static_shortcode_tags ) ); 294 if ( $tagnames ) { 295 $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) ); 296 $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex(). 297 $regexs['no_texturize_shortcode'] = 298 '|' 299 . '\[' // Find start of shortcode. 300 . '[\/\[]?' // Shortcodes may begin with [/ or [[ 301 . $tagregexp // Only match no texturize shortcodes. 302 . '(?:' 303 . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. 304 . '|' 305 . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. 306 . ')*+' // Possessive critical. 307 . '\]' // Find end of shortcode. 308 . '\]?'; // Shortcodes may end with ]] 309 } else { 310 $regexs['no_texturize_shortcode'] = ''; 311 } 312 } 222 313 223 $comment_regex = 224 '!' // Start of comment, after the <. 225 . '(?:' // Unroll the loop: Consume everything until --> is found. 226 . '-(?!->)' // Dash not followed by end of comment. 227 . '[^\-]*+' // Consume non-dashes. 228 . ')*+' // Loop possessively. 229 . '(?:-->)?'; // End of comment. If not found, match all input. 314 // Look for comments, non-inline (non-split) HTML elements and no texturize shortcodes. 230 315 231 $shortcode_regex =232 '\[' // Find start of shortcode.233 . '[\/\[]?' // Shortcodes may begin with [/ or [[234 . $tagregexp // Only match registered shortcodes, because performance.235 . '(?:'236 . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical.237 . '|'238 . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >.239 . ')*+' // Possessive critical.240 . '\]' // Find end of shortcode.241 . '\]?'; // Shortcodes may end with ]]242 243 316 $regex = 244 317 '/(' // Capture the entire match. 245 318 . '<' // Find start of element. 246 319 . '(?(?=!--)' // Is this a comment? 247 . $ comment_regex// Find end of comment.320 . $regexs['comment'] // Find end of comment. 248 321 . '|' 322 . '(?!' . $regexs['nonsplit'] . ')' // Exclude inline html elements. 249 323 . '[^>]*>' // Find end of element. 250 324 . ')' 251 . '|' 252 . $shortcode_regex // Find shortcodes. 325 . $regexs['no_texturize_shortcode'] // Find no texturize shortcodes. 253 326 . ')/s'; 254 327 255 $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);328 $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); 256 329 257 foreach ( $textarr as &$curl ) { 258 // Only call _wptexturize_pushpop_element if $curl is a delimiter. 259 $first = $curl[0]; 260 if ( '<' === $first && '<!--' === substr( $curl, 0, 4 ) ) { 261 // This is an HTML comment delimeter. 330 foreach ( $textarr as $curl_idx => &$curl ) { 331 if ( 1 === $curl_idx % 2 ) { 332 // Delimiter. 333 $first = $curl[0]; 334 if ( '<' === $first ) { 335 // If not a comment. 336 if ( '<!--' !== substr( $curl, 0, 4 ) ) { 337 // This is an HTML element delimiter. 262 338 263 continue; 339 _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags ); 340 } 341 } elseif ( '[' === $first ) { 342 // This is a shortcode delimiter. 264 343 265 } elseif ( '<' === $first && '>' === substr( $curl, -1 ) ) { 266 // This is an HTML element delimiter. 344 if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) { 345 // Looks like a normal shortcode. 346 _wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes ); 347 } else { 348 // Looks like an escaped shortcode. 349 } 350 } 351 } elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) && '' !== trim( $curl ) ) { 352 // This is neither a delimiter, nor is this content inside of no_texturize pairs. Do texturize. 267 353 268 _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags ); 269 270 } elseif ( '' === trim( $curl ) ) { 271 // This is a newline between delimiters. Performance improves when we check this. 272 273 continue; 274 275 } elseif ( '[' === $first && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) { 276 // This is a shortcode delimiter. 277 278 if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) { 279 // Looks like a normal shortcode. 280 _wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes ); 281 } else { 282 // Looks like an escaped shortcode. 283 continue; 354 // Add a space to any <br>s so that when stripped will be recognized as whitespace. 355 if ( $have_br = ( false !== stripos( $curl, '<br' ) ) ) { 356 $curl = preg_replace( '/<br[^>]*>/i', '$0 ', $curl ); 284 357 } 285 358 286 } elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) ) { 287 // This is neither a delimiter, nor is this content inside of no_texturize pairs. Do texturize. 359 wptexturize_replace_init( $curl, '/<[^>]*>' . $regexs['shortcode'] . '/' ); 288 360 289 $curl = str_replace( $static_characters, $static_replacements, $curl);361 wptexturize_replace_str( $curl, $static_characters, $static_replacements ); 290 362 291 363 if ( false !== strpos( $curl, "'" ) ) { 292 $curl = preg_replace( $dynamic_characters['apos'], $dynamic_replacements['apos'], $curl ); 293 $curl = wptexturize_primes( $curl, "'", $prime, $open_sq_flag, $closing_single_quote ); 294 $curl = str_replace( $apos_flag, $apos, $curl ); 295 $curl = str_replace( $open_sq_flag, $opening_single_quote, $curl ); 364 wptexturize_replace_regex( $curl, $dynamic_characters['apos'], $dynamic_replacements['apos'] ); 365 // Substitute single quotes with same-sized dummy so that wptexturize_primes() doesn't alter size of string. 366 wptexturize_replace_str( $curl, "'", $sq_flag ); 367 $curl = wptexturize_primes( $curl, $sq_flag, $prime_sq_flag, $open_sq_flag, $close_sq_flag, $spaces ); 368 // Reinstate real values. 369 wptexturize_replace_str( $curl, $flags['sq'], $reals['sq'] ); 296 370 } 297 371 if ( false !== strpos( $curl, '"' ) ) { 298 $curl = preg_replace( $dynamic_characters['quote'], $dynamic_replacements['quote'], $curl ); 299 $curl = wptexturize_primes( $curl, '"', $double_prime, $open_q_flag, $closing_quote ); 300 $curl = str_replace( $open_q_flag, $opening_quote, $curl ); 372 wptexturize_replace_regex( $curl, $dynamic_characters['quote'], $dynamic_replacements['quote'] ); 373 // Substitute double quotes with same-sized dummy so that wptexturize_primes() doesn't alter size of string. 374 wptexturize_replace_str( $curl, '"', $q_flag ); 375 $curl = wptexturize_primes( $curl, $q_flag, $prime_q_flag, $open_q_flag, $close_q_flag, $spaces ); 376 // Reinstate real values. 377 wptexturize_replace_str( $curl, $flags['q'], $reals['q'] ); 301 378 } 302 379 if ( false !== strpos( $curl, '-' ) ) { 303 $curl = preg_replace( $dynamic_characters['dash'], $dynamic_replacements['dash'], $curl);380 wptexturize_replace_regex( $curl, $dynamic_characters['dash'], $dynamic_replacements['dash'] ); 304 381 } 305 382 306 383 // 9x9 (times), but never 0x9999 307 384 if ( 1 === preg_match( '/(?<=\d)x\d/', $curl ) ) { 308 385 // Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one! 309 $curl = preg_replace( '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(\d[\d\.,]*)\b/', '$1×$2', $curl );386 wptexturize_replace_regex( $curl, '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(?=\d[\d\.,]*\b)/', '$1×' ); // Changed to use look ahead as can only deal with a single sub-replacement. 310 387 } 388 389 wptexturize_replace_final( $curl ); 390 391 // Remove any spaces added to <br>s at the start. 392 if ( $have_br ) { 393 $curl = preg_replace( '/(<br[^>]*>) /i', '$1', $curl ); 394 } 311 395 } 312 396 } 313 397 $text = implode( '', $textarr ); … … 330 414 * @param string $close_quote The closing quote char to use for replacement. 331 415 * @return string The $haystack value after primes and quotes replacements. 332 416 */ 333 function wptexturize_primes( $haystack, $needle, $prime, $open_quote, $close_quote ) {334 $ spaces = wp_spaces_regexp();335 $flag = '<!--wp-prime-or-quote-->';417 function wptexturize_primes( $haystack, $needle, $prime, $open_quote, $close_quote, $spaces ) { 418 $flag = str_pad( '<i f>', strlen( $needle ), '>' ); // Making flag same size as the passed-in dummy. 419 $flag_len = strlen( $flag ); 336 420 $quote_pattern = "/$needle(?=\\Z|[.,:;!?)}\\-\\]]|>|" . $spaces . ")/"; 337 421 $prime_pattern = "/(?<=\\d)$needle/"; 338 422 $flag_after_digit = "/(?<=\\d)$flag/"; … … 359 443 // This is most likely to be problematic in the context of bug #18549. 360 444 $pos = strrpos( $sentence, $flag ); 361 445 } 362 $sentence = substr_replace( $sentence, $close_quote, $pos, strlen( $flag ));446 $sentence = substr_replace( $sentence, $close_quote, $pos, $flag_len ); 363 447 } 364 448 // Use conventional replacement on any remaining primes and quotes. 365 449 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); 366 450 $sentence = preg_replace( $flag_after_digit, $prime, $sentence ); 367 451 $sentence = str_replace( $flag, $close_quote, $sentence ); 368 } elseif ( 1 == $count ) {452 } elseif ( 1 === $count ) { 369 453 // Found only one closing quote candidate, so give it priority over primes. 370 454 $sentence = str_replace( $flag, $close_quote, $sentence ); 371 455 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); … … 377 461 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); 378 462 $sentence = preg_replace( $quote_pattern, $close_quote, $sentence ); 379 463 } 380 if ( '"' == $needle && false !== strpos( $sentence, '"') ) {381 $sentence = str_replace( '"', $close_quote, $sentence );464 if ( '"' === $needle[0] && false !== strpos( $sentence, $needle ) ) { 465 $sentence = str_replace( $needle, $close_quote, $sentence ); 382 466 } 383 467 } 384 468 … … 440 524 } 441 525 442 526 /** 527 * Initialize the stripped string routines wptexturize_replace_XXX, setting the globals used. 528 * $str will be stripped of any strings that match the regular expression $search. 529 */ 530 function wptexturize_replace_init( &$str, $search ) { 531 global $wptexturize_strip_cnt, $wptexturize_strips, $wptexturize_adjusts; 532 533 $wptexturize_strip_cnt = 0; 534 535 if ( preg_match_all( $search, $str, $matches, PREG_OFFSET_CAPTURE ) ) { 536 $wptexturize_strips = $wptexturize_adjusts = $strs = array(); 537 $diff = 0; 538 foreach ( $matches[0] as list( $match, $offset ) ) { 539 $len = strlen( $match ); 540 // Save details of stripped string. 541 $wptexturize_strips[] = array( $match, $offset - $diff /*, $len /* Store len if not using byte array in wptexturize_replace_final(). */ ); 542 $diff += $len; 543 $strs[] = $match; // If using str_replace rather than (safer) preg_replace. 544 } 545 $wptexturize_strip_cnt = count( $wptexturize_strips ); 546 $str = str_replace( $strs, '', $str ); // Assuming simple matches replaceable in whole string (otherwise need to do preg_replace( $search, '', $str )). 547 } 548 return $wptexturize_strip_cnt; 549 } 550 551 /** 552 * Do a straight (non-regexp) string substitution, keeping tabs on the offset adjustments if have a stripped string. 553 */ 554 function wptexturize_replace_str( &$str, $search, $repl ) { 555 global $wptexturize_strip_cnt, $wptexturize_adjusts; 556 557 if ( $wptexturize_strip_cnt ) { 558 // Process simple string search, given replacement string $repl. 559 $searches = is_array( $search ) ? $search : array( $search ); 560 $repls = is_array( $repl ) ? $repl : array( $repl ); 561 562 // As replacements could interfere with later ones, treat each separately. 563 foreach ( $searches as $idx => $search_str ) { 564 if ( false !== ( $offset = strpos( $str, $search_str ) ) ) { 565 $repl_str = $repls[$idx]; 566 $repl_len = strlen( $repl_str ); 567 $len = strlen( $search_str ); 568 $diff_len = $repl_len - $len; 569 if ( $diff_len ) { 570 $diff = 0; 571 do { 572 // Store adjustment details. 573 $wptexturize_adjusts[] = array( $offset + $diff, $repl_len - 1 ); 574 if ( $len > 1 ) { // Do it this way (rather than one adjust of $repl_len - $len) to keep adjustments "atomic", ie to keep stripped elements outside replacement. 575 $wptexturize_adjusts[] = array( $offset + $diff + $repl_len, 1 - $len ); 576 } 577 $diff += $diff_len; 578 } while ( false !== ( $offset = strpos( $str, $search_str, $offset + $len ) ) ); 579 } 580 $str = str_replace( $search_str, $repl_str, $str ); 581 } 582 } 583 } else { 584 $str = str_replace( $search, $repl, $str ); 585 } 586 } 587 588 /** 589 * Do a regexp string substitution, keeping tabs on the offset adjustments if have a stripped string. 590 */ 591 function wptexturize_replace_regex( &$str, $search, $repl ) { 592 global $wptexturize_strip_cnt, $wptexturize_adjusts; 593 594 if ( $wptexturize_strip_cnt ) { 595 // Process regex, given replacement string $repl. 596 $searches = is_array( $search ) ? $search : array( $search ); 597 $repls = is_array( $repl ) ? $repl : array( $repl ); 598 599 // As replacements could interfere with later ones, treat each separately. 600 foreach ( $searches as $idx => $re ) { 601 if ( preg_match_all( $re, $str, $matches, PREG_OFFSET_CAPTURE ) ) { 602 $repl_str = $repls[$idx]; 603 $repl_len = strlen( $repl_str ); 604 $diff = 0; 605 // Allow for a single captured replacement. 606 if ( false !== ( $pos1 = strpos( $repl_str, '$1' ) ) ) { 607 foreach ( $matches[0] as $i => list( $match, $offset ) ) { 608 // For a 'pre$1post' replacement, need to track pre-submatch replace and then post-submatch replace. 609 $pre_repl_len = $pos1; 610 $pre_len = $matches[1][$i][1] - $offset; // Submatch offset less full match offset. 611 if ( $pre_repl_len !== $pre_len ) { 612 // Store adjustment details. 613 $wptexturize_adjusts[] = array( $offset + $diff, $pre_repl_len - 1 ); 614 if ( $pre_len > 1 ) { // Keep adjustments atomic. 615 $wptexturize_adjusts[] = array( $offset + $diff + $pre_repl_len, 1 - $pre_len ); 616 } 617 $diff += $pre_repl_len - $pre_len; 618 } 619 $len1 = strlen( $matches[1][$i][0] ); // Length of submatch string. 620 $post_repl_len = $repl_len - ( $pre_repl_len + 2 ); 621 $post_len = strlen( $match ) - ( $pre_len + $len1 ); 622 if ( $post_repl_len !== $post_len ) { 623 // Store adjustment details. 624 $offset += $pre_len + $len1; // Jump over substituted pre-string & submatch. 625 $wptexturize_adjusts[] = array( $offset + $diff, $post_repl_len - 1 ); 626 if ( $post_len > 1 ) { // Keep adjustments atomic. 627 $wptexturize_adjusts[] = array( $offset + $diff + $post_repl_len, 1 - $post_len ); 628 } 629 $diff += $post_repl_len - $post_len; 630 } 631 } 632 } else { 633 foreach ( $matches[0] as list( $match, $offset ) ) { 634 $len = strlen( $match ); 635 if ( $repl_len !== $len ) { 636 // Store adjustment details. 637 $wptexturize_adjusts[] = array( $offset + $diff, $repl_len - 1 ); 638 if ( $len > 1 ) { // Keep adjustments atomic. 639 $wptexturize_adjusts[] = array( $offset + $diff + $repl_len, 1 - $len ); 640 } 641 $diff += $repl_len - $len; 642 } 643 } 644 } 645 $str = preg_replace( $re, $repl_str, $str ); 646 } 647 } 648 } else { 649 $str = preg_replace( $search, $repl, $str ); 650 } 651 } 652 653 /** 654 * Restore stripped strings to $str. 655 */ 656 function wptexturize_replace_final( &$str ) { 657 global $wptexturize_strip_cnt, $wptexturize_strips, $wptexturize_adjusts; 658 659 // Finalize - restore stripped strings. 660 if ( $wptexturize_strip_cnt ) { 661 // Calculate offset adjustments. 662 foreach ( $wptexturize_adjusts as list( $offset, $diff_len ) ) { 663 for ( $i = $wptexturize_strip_cnt - 1; $i >= 0 && $offset < $wptexturize_strips[$i][1]; $i-- ) { 664 $wptexturize_strips[$i][1] += $diff_len; 665 } 666 } 667 668 // Restore stripped strings. 669 $str_arr = str_split( $str ); // Using byte array (seems to be a bit quicker than substr_replace()). 670 array_unshift( $str_arr, '' ); 671 foreach ( $wptexturize_strips as list( $strip, $offset ) ) { 672 $str_arr[$offset] .= $strip; 673 } 674 $str = implode( '', $str_arr ); 675 unset( $str_arr ); 676 /* If not using byte array. (Note need to store $len in wptexturize_replace_init()). 677 $diff = 0; 678 foreach ( $wptexturize_strips as list( $strip, $offset, $len ) ) { 679 $str = substr_replace( $str, $strip, $offset + $diff, 0 ); 680 $diff += $len; 681 } 682 /**/ 683 $wptexturize_strip_cnt = 0; 684 } 685 } 686 687 /** 443 688 * Replaces double line-breaks with paragraph elements. 444 689 * 445 690 * A group of regex replaces used to identify text formatted with newlines and