Make WordPress Core


Ignore:
Timestamp:
06/19/2015 08:05:52 PM (10 years ago)
Author:
wonderboymusic
Message:

wptexturize() improvements:

  • Make sure that strings ending with a number and quotation mark get the proper smart quotes
  • Introduce wptexturize_primes(), a logic tree to determine whether or not "7'." represents seven feet, then converts the special char into either a prime char or a closing quote char.

Adds unit tests.

Props miqrogroove.
Fixes #29256.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/formatting.php

    r32851 r32863  
    4747        $default_no_texturize_tags = null,
    4848        $default_no_texturize_shortcodes = null,
    49         $run_texturize = true;
     49        $run_texturize = true,
     50        $apos = null,
     51        $prime = null,
     52        $double_prime = null,
     53        $opening_quote = null,
     54        $closing_quote = null,
     55        $opening_single_quote = null,
     56        $closing_single_quote = null,
     57        $open_q_flag = '<!--oq-->',
     58        $open_sq_flag = '<!--osq-->',
     59        $apos_flag = '<!--apos-->';
    5060
    5161    // If there's nothing to do, just stop.
     
    130140        // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation.
    131141        if ( "'" !== $apos || "'" !== $closing_single_quote ) {
    132             $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos . '$1' . $closing_single_quote;
     142            $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_single_quote;
    133143        }
    134144        if ( "'" !== $apos || '"' !== $closing_quote ) {
    135             $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos . '$1' . $closing_quote;
     145            $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_quote;
    136146        }
    137147
    138148        // '99 '99s '99's (apostrophe)  But never '9 or '99% or '999 or '99.0.
    139149        if ( "'" !== $apos ) {
    140             $dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos;
     150            $dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos_flag;
    141151        }
    142152
    143153        // Quoted Numbers like '0.42'
    144154        if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) {
    145             $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $opening_single_quote . '$1' . $closing_single_quote;
     155            $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $closing_single_quote;
    146156        }
    147157
    148158        // Single quote at start, or preceded by (, {, <, [, ", -, or spaces.
    149159        if ( "'" !== $opening_single_quote ) {
    150             $dynamic[ '/(?<=\A|[([{"\-]|&lt;|' . $spaces . ')\'/' ] = $opening_single_quote;
     160            $dynamic[ '/(?<=\A|[([{"\-]|&lt;|' . $spaces . ')\'/' ] = $open_sq_flag;
    151161        }
    152162
    153163        // Apostrophe in a word.  No spaces, double apostrophes, or other punctuation.
    154164        if ( "'" !== $apos ) {
    155             $dynamic[ '/(?<!' . $spaces . ')\'(?!\Z|[.,:;!?"\'(){}[\]\-]|&[lg]t;|' . $spaces . ')/' ] = $apos;
    156         }
    157 
    158         // 9' (prime)
    159         if ( "'" !== $prime ) {
    160             $dynamic[ '/(?<=\d)\'/' ] = $prime;
    161         }
    162 
    163         // Single quotes followed by spaces or ending punctuation.
    164         if ( "'" !== $closing_single_quote ) {
    165             $dynamic[ '/\'(?=\Z|[.,:;!?)}\-\]]|&gt;|' . $spaces . ')/' ] = $closing_single_quote;
     165            $dynamic[ '/(?<!' . $spaces . ')\'(?!\Z|[.,:;!?"\'(){}[\]\-]|&[lg]t;|' . $spaces . ')/' ] = $apos_flag;
    166166        }
    167167
     
    172172        // Quoted Numbers like "42"
    173173        if ( '"' !== $opening_quote && '"' !== $closing_quote ) {
    174             $dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $opening_quote . '$1' . $closing_quote;
    175         }
    176 
    177         // 9" (double prime)
    178         if ( '"' !== $double_prime ) {
    179             $dynamic[ '/(?<=\d)"/' ] = $double_prime;
     174            $dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $open_q_flag . '$1' . $closing_quote;
    180175        }
    181176
    182177        // Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces.
    183178        if ( '"' !== $opening_quote ) {
    184             $dynamic[ '/(?<=\A|[([{\-]|&lt;|' . $spaces . ')"(?!' . $spaces . ')/' ] = $opening_quote;
    185         }
    186 
    187         // Any remaining double quotes.
    188         if ( '"' !== $closing_quote ) {
    189             $dynamic[ '/"/' ] = $closing_quote;
     179            $dynamic[ '/(?<=\A|[([{\-]|&lt;|' . $spaces . ')"(?!' . $spaces . ')/' ] = $open_q_flag;
    190180        }
    191181
     
    301291            if ( false !== strpos( $curl, "'" ) ) {
    302292                $curl = preg_replace( $dynamic_characters['apos'], $dynamic_replacements['apos'], $curl );
     293                $curl = wptexturize_primes( $curl, "'", $prime, $open_sq_flag, $closing_single_quote );
     294                $curl = str_replace( $apos_flag, $apos, $curl );
     295                $curl = str_replace( $open_sq_flag, $opening_single_quote, $curl );
    303296            }
    304297            if ( false !== strpos( $curl, '"' ) ) {
    305298                $curl = preg_replace( $dynamic_characters['quote'], $dynamic_replacements['quote'], $curl );
     299                $curl = wptexturize_primes( $curl, '"', $double_prime, $open_q_flag, $closing_quote );
     300                $curl = str_replace( $open_q_flag, $opening_quote, $curl );
    306301            }
    307302            if ( false !== strpos( $curl, '-' ) ) {
     
    320315    // Replace each & with &#038; unless it already looks like an entity.
    321316    return preg_replace( '/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&#038;', $text );
     317}
     318
     319/**
     320 * Implements a logic tree to determine whether or not "7'." represents seven feet,
     321 * then converts the special char into either a prime char or a closing quote char.
     322 *
     323 * @since 4.3.0
     324 *
     325 * @param string $haystack The plain text to be searched.
     326 * @param string $needle The character to search for such as ' or ".
     327 * @param string $prime The prime char to use for replacement.
     328 * @param string $open_quote The opening quote char. Opening quote replacement must be accomplished already.
     329 * @param string $close_quote The closing quote char to use for replacement.
     330 * @return string The $haystack value after primes and quotes replacements.
     331 */
     332function wptexturize_primes( $haystack, $needle, $prime, $open_quote, $close_quote ) {
     333    $spaces = wp_spaces_regexp();
     334    $flag = '<!--wp-prime-or-quote-->';
     335    $quote_pattern = "/$needle(?=\\Z|[.,:;!?)}\\-\\]]|&gt;|" . $spaces . ")/";
     336    $prime_pattern    = "/(?<=\\d)$needle/";
     337    $flag_after_digit = "/(?<=\\d)$flag/";
     338    $flag_no_digit    = "/(?<!\\d)$flag/";
     339
     340    $sentences = explode( $open_quote, $haystack );
     341
     342    foreach( $sentences as $key => &$sentence ) {
     343        if ( false === strpos( $sentence, $needle ) ) {
     344            continue;
     345        } elseif ( 0 !== $key && 0 === substr_count( $sentence, $close_quote ) ) {
     346            $sentence = preg_replace( $quote_pattern, $flag, $sentence, -1, $count );
     347            if ( $count > 1 ) {
     348                // This sentence appears to have multiple closing quotes.  Attempt Vulcan logic.
     349                $sentence = preg_replace( $flag_no_digit, $close_quote, $sentence, -1, $count2 );
     350                if ( 0 === $count2 ) {
     351                    // Try looking for a quote followed by a period.
     352                    $count2 = substr_count( $sentence, "$flag." );
     353                    if ( $count2 > 0 ) {
     354                        // Assume the rightmost quote-period match is the end of quotation.
     355                        $pos = strrpos( $sentence, "$flag." );
     356                    } else {
     357                        // When all else fails, make the rightmost candidate a closing quote.
     358                        // This is most likely to be problematic in the context of bug #18549.
     359                        $pos = strrpos( $sentence, $flag );
     360                    }
     361                    $sentence = substr_replace( $sentence, $close_quote, $pos, strlen( $flag ) );
     362                }
     363                // Use conventional replacement on any remaining primes and quotes.
     364                $sentence = preg_replace( $prime_pattern, $prime, $sentence );
     365                $sentence = preg_replace( $flag_after_digit, $prime, $sentence );
     366                $sentence = str_replace( $flag, $close_quote, $sentence );
     367            } elseif ( 1 == $count ) {
     368                // Found only one closing quote candidate, so give it priority over primes.
     369                $sentence = str_replace( $flag, $close_quote, $sentence );
     370                $sentence = preg_replace( $prime_pattern, $prime, $sentence );
     371            } else {
     372                // No closing quotes found.  Just run primes pattern.
     373                $sentence = preg_replace( $prime_pattern, $prime, $sentence );
     374            }
     375        } else {
     376            $sentence = preg_replace( $prime_pattern, $prime, $sentence );
     377            $sentence = preg_replace( $quote_pattern, $close_quote, $sentence );
     378        }
     379        if ( '"' == $needle && false !== strpos( $sentence, '"' ) ) {
     380            $sentence = str_replace( '"', $close_quote, $sentence );
     381        }
     382    }
     383
     384    return implode( $open_quote, $sentences );
    322385}
    323386
Note: See TracChangeset for help on using the changeset viewer.