Make WordPress Core

Changeset 32863


Ignore:
Timestamp:
06/19/2015 08:05:52 PM (9 years ago)
Author:
wonderboymusic
Message:

wptexturize() improvements:

  • Make sure that strings ending with a number and quotation mark get the proper smart quotes
  • Introduce wptexturize_primes(), a logic tree to determine whether or not "7'." represents seven feet, then converts the special char into either a prime char or a closing quote char.

Adds unit tests.

Props miqrogroove.
Fixes #29256.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/formatting.php

    r32851 r32863  
    4747        $default_no_texturize_tags = null,
    4848        $default_no_texturize_shortcodes = null,
    49         $run_texturize = true;
     49        $run_texturize = true,
     50        $apos = null,
     51        $prime = null,
     52        $double_prime = null,
     53        $opening_quote = null,
     54        $closing_quote = null,
     55        $opening_single_quote = null,
     56        $closing_single_quote = null,
     57        $open_q_flag = '<!--oq-->',
     58        $open_sq_flag = '<!--osq-->',
     59        $apos_flag = '<!--apos-->';
    5060
    5161    // If there's nothing to do, just stop.
     
    130140        // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation.
    131141        if ( "'" !== $apos || "'" !== $closing_single_quote ) {
    132             $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos . '$1' . $closing_single_quote;
     142            $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_single_quote;
    133143        }
    134144        if ( "'" !== $apos || '"' !== $closing_quote ) {
    135             $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos . '$1' . $closing_quote;
     145            $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_quote;
    136146        }
    137147
    138148        // '99 '99s '99's (apostrophe)  But never '9 or '99% or '999 or '99.0.
    139149        if ( "'" !== $apos ) {
    140             $dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos;
     150            $dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos_flag;
    141151        }
    142152
    143153        // Quoted Numbers like '0.42'
    144154        if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) {
    145             $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $opening_single_quote . '$1' . $closing_single_quote;
     155            $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $closing_single_quote;
    146156        }
    147157
    148158        // Single quote at start, or preceded by (, {, <, [, ", -, or spaces.
    149159        if ( "'" !== $opening_single_quote ) {
    150             $dynamic[ '/(?<=\A|[([{"\-]|&lt;|' . $spaces . ')\'/' ] = $opening_single_quote;
     160            $dynamic[ '/(?<=\A|[([{"\-]|&lt;|' . $spaces . ')\'/' ] = $open_sq_flag;
    151161        }
    152162
    153163        // Apostrophe in a word.  No spaces, double apostrophes, or other punctuation.
    154164        if ( "'" !== $apos ) {
    155             $dynamic[ '/(?<!' . $spaces . ')\'(?!\Z|[.,:;!?"\'(){}[\]\-]|&[lg]t;|' . $spaces . ')/' ] = $apos;
    156         }
    157 
    158         // 9' (prime)
    159         if ( "'" !== $prime ) {
    160             $dynamic[ '/(?<=\d)\'/' ] = $prime;
    161         }
    162 
    163         // Single quotes followed by spaces or ending punctuation.
    164         if ( "'" !== $closing_single_quote ) {
    165             $dynamic[ '/\'(?=\Z|[.,:;!?)}\-\]]|&gt;|' . $spaces . ')/' ] = $closing_single_quote;
     165            $dynamic[ '/(?<!' . $spaces . ')\'(?!\Z|[.,:;!?"\'(){}[\]\-]|&[lg]t;|' . $spaces . ')/' ] = $apos_flag;
    166166        }
    167167
     
    172172        // Quoted Numbers like "42"
    173173        if ( '"' !== $opening_quote && '"' !== $closing_quote ) {
    174             $dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $opening_quote . '$1' . $closing_quote;
    175         }
    176 
    177         // 9" (double prime)
    178         if ( '"' !== $double_prime ) {
    179             $dynamic[ '/(?<=\d)"/' ] = $double_prime;
     174            $dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $open_q_flag . '$1' . $closing_quote;
    180175        }
    181176
    182177        // Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces.
    183178        if ( '"' !== $opening_quote ) {
    184             $dynamic[ '/(?<=\A|[([{\-]|&lt;|' . $spaces . ')"(?!' . $spaces . ')/' ] = $opening_quote;
    185         }
    186 
    187         // Any remaining double quotes.
    188         if ( '"' !== $closing_quote ) {
    189             $dynamic[ '/"/' ] = $closing_quote;
     179            $dynamic[ '/(?<=\A|[([{\-]|&lt;|' . $spaces . ')"(?!' . $spaces . ')/' ] = $open_q_flag;
    190180        }
    191181
     
    301291            if ( false !== strpos( $curl, "'" ) ) {
    302292                $curl = preg_replace( $dynamic_characters['apos'], $dynamic_replacements['apos'], $curl );
     293                $curl = wptexturize_primes( $curl, "'", $prime, $open_sq_flag, $closing_single_quote );
     294                $curl = str_replace( $apos_flag, $apos, $curl );
     295                $curl = str_replace( $open_sq_flag, $opening_single_quote, $curl );
    303296            }
    304297            if ( false !== strpos( $curl, '"' ) ) {
    305298                $curl = preg_replace( $dynamic_characters['quote'], $dynamic_replacements['quote'], $curl );
     299                $curl = wptexturize_primes( $curl, '"', $double_prime, $open_q_flag, $closing_quote );
     300                $curl = str_replace( $open_q_flag, $opening_quote, $curl );
    306301            }
    307302            if ( false !== strpos( $curl, '-' ) ) {
     
    320315    // Replace each & with &#038; unless it already looks like an entity.
    321316    return preg_replace( '/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&#038;', $text );
     317}
     318
     319/**
     320 * Implements a logic tree to determine whether or not "7'." represents seven feet,
     321 * then converts the special char into either a prime char or a closing quote char.
     322 *
     323 * @since 4.3.0
     324 *
     325 * @param string $haystack The plain text to be searched.
     326 * @param string $needle The character to search for such as ' or ".
     327 * @param string $prime The prime char to use for replacement.
     328 * @param string $open_quote The opening quote char. Opening quote replacement must be accomplished already.
     329 * @param string $close_quote The closing quote char to use for replacement.
     330 * @return string The $haystack value after primes and quotes replacements.
     331 */
     332function wptexturize_primes( $haystack, $needle, $prime, $open_quote, $close_quote ) {
     333    $spaces = wp_spaces_regexp();
     334    $flag = '<!--wp-prime-or-quote-->';
     335    $quote_pattern = "/$needle(?=\\Z|[.,:;!?)}\\-\\]]|&gt;|" . $spaces . ")/";
     336    $prime_pattern    = "/(?<=\\d)$needle/";
     337    $flag_after_digit = "/(?<=\\d)$flag/";
     338    $flag_no_digit    = "/(?<!\\d)$flag/";
     339
     340    $sentences = explode( $open_quote, $haystack );
     341
     342    foreach( $sentences as $key => &$sentence ) {
     343        if ( false === strpos( $sentence, $needle ) ) {
     344            continue;
     345        } elseif ( 0 !== $key && 0 === substr_count( $sentence, $close_quote ) ) {
     346            $sentence = preg_replace( $quote_pattern, $flag, $sentence, -1, $count );
     347            if ( $count > 1 ) {
     348                // This sentence appears to have multiple closing quotes.  Attempt Vulcan logic.
     349                $sentence = preg_replace( $flag_no_digit, $close_quote, $sentence, -1, $count2 );
     350                if ( 0 === $count2 ) {
     351                    // Try looking for a quote followed by a period.
     352                    $count2 = substr_count( $sentence, "$flag." );
     353                    if ( $count2 > 0 ) {
     354                        // Assume the rightmost quote-period match is the end of quotation.
     355                        $pos = strrpos( $sentence, "$flag." );
     356                    } else {
     357                        // When all else fails, make the rightmost candidate a closing quote.
     358                        // This is most likely to be problematic in the context of bug #18549.
     359                        $pos = strrpos( $sentence, $flag );
     360                    }
     361                    $sentence = substr_replace( $sentence, $close_quote, $pos, strlen( $flag ) );
     362                }
     363                // Use conventional replacement on any remaining primes and quotes.
     364                $sentence = preg_replace( $prime_pattern, $prime, $sentence );
     365                $sentence = preg_replace( $flag_after_digit, $prime, $sentence );
     366                $sentence = str_replace( $flag, $close_quote, $sentence );
     367            } elseif ( 1 == $count ) {
     368                // Found only one closing quote candidate, so give it priority over primes.
     369                $sentence = str_replace( $flag, $close_quote, $sentence );
     370                $sentence = preg_replace( $prime_pattern, $prime, $sentence );
     371            } else {
     372                // No closing quotes found.  Just run primes pattern.
     373                $sentence = preg_replace( $prime_pattern, $prime, $sentence );
     374            }
     375        } else {
     376            $sentence = preg_replace( $prime_pattern, $prime, $sentence );
     377            $sentence = preg_replace( $quote_pattern, $close_quote, $sentence );
     378        }
     379        if ( '"' == $needle && false !== strpos( $sentence, '"' ) ) {
     380            $sentence = str_replace( '"', $close_quote, $sentence );
     381        }
     382    }
     383
     384    return implode( $open_quote, $sentences );
    322385}
    323386
  • trunk/tests/phpunit/tests/formatting/WPTexturize.php

    r32789 r32863  
    9191        //$this->assertEquals('Here is &#8220;a test <a href="http://example.com">with a link</a>&#8221;.', wptexturize('Here is "a test <a href="http://example.com">with a link</a>".'));
    9292        //$this->assertEquals('Here is &#8220;<a href="http://example.com">a test with a link</a>&#8221;and a work stuck to the end.', wptexturize('Here is "<a href="http://example.com">a test with a link</a>"and a work stuck to the end.'));
    93         //$this->assertEquals('A test with a finishing number, &#8220;like 23&#8221;.', wptexturize('A test with a finishing number, "like 23".'));
    94         //$this->assertEquals('A test with a number, &#8220;like 62&#8221;, is nice to have.', wptexturize('A test with a number, "like 62", is nice to have.'));
     93        $this->assertEquals('A test with a finishing number, &#8220;like 23&#8221;.', wptexturize('A test with a finishing number, "like 23".'));
     94        $this->assertEquals('A test with a number, &#8220;like 62&#8221;, is nice to have.', wptexturize('A test with a number, "like 62", is nice to have.'));
    9595    }
    9696
     
    122122        $this->assertEquals('&#8216;Class of &#8217;99&#8217;s&#8217;', wptexturize("'Class of '99's'"));
    123123        $this->assertEquals('&#8216;Class of &#8217;99&#8217;s&#8217;', wptexturize("'Class of '99&#8217;s'"));
    124         //$this->assertEquals('&#8220;Class of 99&#8221;', wptexturize("\"Class of 99\""));
     124        $this->assertEquals('&#8220;Class of 99&#8221;', wptexturize("\"Class of 99\""));
    125125        $this->assertEquals('&#8220;Class of &#8217;99&#8221;', wptexturize("\"Class of '99\""));
    126126        $this->assertEquals('{&#8220;Class of &#8217;99&#8221;}', wptexturize("{\"Class of '99\"}"));
     
    19011901        );
    19021902    }
     1903
     1904    /**
     1905     * Ensure primes logic is not too greedy at the end of a quotation.
     1906     *
     1907     * @ticket 29256
     1908     * @dataProvider data_primes_vs_quotes
     1909     */
     1910    function test_primes_vs_quotes( $input, $output ) {
     1911        return $this->assertEquals( $output, wptexturize( $input ) );
     1912    }
     1913
     1914    function data_primes_vs_quotes() {
     1915        return array(
     1916            array(
     1917                "George's porch is 99' long.",
     1918                "George&#8217;s porch is 99&#8242; long.",
     1919            ),
     1920            array(
     1921                'The best year "was that time in 2012" when everyone partied, he said.',
     1922                'The best year &#8220;was that time in 2012&#8221; when everyone partied, he said.',
     1923            ),
     1924            array(
     1925                "I need 4 x 20' = 80' of trim.", // Works only with a space before the = char.
     1926                "I need 4 x 20&#8242; = 80&#8242; of trim.",
     1927            ),
     1928            array(
     1929                '"Lorem ipsum dolor sit amet 1234"',
     1930                '&#8220;Lorem ipsum dolor sit amet 1234&#8221;',
     1931            ),
     1932            array(
     1933                "'Etiam eu egestas dui 1234'",
     1934                "&#8216;Etiam eu egestas dui 1234&#8217;",
     1935            ),
     1936            array(
     1937                'according to our source, "33% of all students scored less than 50" on the test.',
     1938                'according to our source, &#8220;33% of all students scored less than 50&#8221; on the test.',
     1939            ),
     1940            array(
     1941                "The doctor said, 'An average height is between 5' and 6' in study group 7'.  He then produced a 6' chart of averages.  A man of 7', incredibly, is very possible.",
     1942                "The doctor said, &#8216;An average height is between 5&#8242; and 6&#8242; in study group 7&#8217;.  He then produced a 6&#8242; chart of averages.  A man of 7&#8242;, incredibly, is very possible.",
     1943            ),
     1944            array(
     1945                'Pirates have voted on "The Expendables 3" with their clicks -- and it turns out the Sylvester Stallone-starrer hasn\'t been astoundingly popular among digital thieves, relatively speaking.
     1946
     1947As of Sunday, 5.12 million people worldwide had pirated "Expendables 3" since a high-quality copy hit torrent-sharing sites July 23, according to piracy-tracking firm Excipio.
     1948
     1949That likely contributed to the action movie\'s dismal box-office debut this weekend. But over the same July 23-Aug. 18 time period, the movie was No. 4 in downloads, after "Captain America: The Winter Soldier" (7.31 million), "Divergent" (6.29 million) and "The Amazing Spider-Man 2" (5.88 million). Moreover, that\'s despite "Expendables 3" becoming available more than three weeks prior to the film\'s U.S. theatrical debut.
     1950
     1951String with a number followed by a single quote \'Expendables 3\' vestibulum in arcu mi.',
     1952
     1953                'Pirates have voted on &#8220;The Expendables 3&#8221; with their clicks &#8212; and it turns out the Sylvester Stallone-starrer hasn&#8217;t been astoundingly popular among digital thieves, relatively speaking.
     1954
     1955As of Sunday, 5.12 million people worldwide had pirated &#8220;Expendables 3&#8221; since a high-quality copy hit torrent-sharing sites July 23, according to piracy-tracking firm Excipio.
     1956
     1957That likely contributed to the action movie&#8217;s dismal box-office debut this weekend. But over the same July 23-Aug. 18 time period, the movie was No. 4 in downloads, after &#8220;Captain America: The Winter Soldier&#8221; (7.31 million), &#8220;Divergent&#8221; (6.29 million) and &#8220;The Amazing Spider-Man 2&#8221; (5.88 million). Moreover, that&#8217;s despite &#8220;Expendables 3&#8221; becoming available more than three weeks prior to the film&#8217;s U.S. theatrical debut.
     1958
     1959String with a number followed by a single quote &#8216;Expendables 3&#8217; vestibulum in arcu mi.',
     1960            ),
     1961        );
     1962    }
     1963
     1964    /**
     1965     * Make sure translation actually works.
     1966     *
     1967     * Also make sure opening and closing quotes are allowed to be identical.
     1968     *
     1969     * @ticket 29256
     1970     * @dataProvider data_primes_quotes_translation
     1971     */
     1972    function test_primes_quotes_translation( $input, $output ) {
     1973        add_filter( 'gettext_with_context', array( $this, 'filter_translate2' ), 10, 4 );
     1974
     1975        $result = wptexturize( $input, true );
     1976
     1977        remove_filter( 'gettext_with_context', array( $this, 'filter_translate2' ), 10, 4 );
     1978        wptexturize( 'reset', true );
     1979
     1980        return $this->assertEquals( $output, $result );
     1981    }
     1982
     1983    function filter_translate2( $translations, $text, $context, $domain ) {
     1984        switch ($text) {
     1985            case '&#8211;' : return '!endash!';
     1986            case '&#8212;' : return '!emdash!';
     1987            case '&#8216;' : return '!q1!';
     1988            case '&#8217;' :
     1989                if ( 'apostrophe' == $context ) {
     1990                    return '!apos!';
     1991                } else {
     1992                    return '!q1!';
     1993                }
     1994            case '&#8220;' : return '!q2!';
     1995            case '&#8221;' : return '!q2!';
     1996            case '&#8242;' : return '!prime1!';
     1997            case '&#8243;' : return '!prime2!';
     1998            default : return $translations;
     1999        }
     2000    }
     2001
     2002    function data_primes_quotes_translation() {
     2003        return array(
     2004            array(
     2005                "George's porch is 99' long.",
     2006                "George!apos!s porch is 99!prime1! long.",
     2007            ),
     2008            array(
     2009                'The best year "was that time in 2012" when everyone partied, he said.',
     2010                'The best year !q2!was that time in 2012!q2! when everyone partied, he said.',
     2011            ),
     2012            array(
     2013                "I need 4 x 20' = 80' of trim.", // Works only with a space before the = char.
     2014                "I need 4 x 20!prime1! = 80!prime1! of trim.",
     2015            ),
     2016            array(
     2017                '"Lorem ipsum dolor sit amet 1234"',
     2018                '!q2!Lorem ipsum dolor sit amet 1234!q2!',
     2019            ),
     2020            array(
     2021                "'Etiam eu egestas dui 1234'",
     2022                "!q1!Etiam eu egestas dui 1234!q1!",
     2023            ),
     2024            array(
     2025                'according to our source, "33% of all students scored less than 50" on the test.',
     2026                'according to our source, !q2!33% of all students scored less than 50!q2! on the test.',
     2027            ),
     2028            array(
     2029                "The doctor said, 'An average height is between 5' and 6' in study group 7'.  He then produced a 6' chart of averages.  A man of 7', incredibly, is very possible.",
     2030                "The doctor said, !q1!An average height is between 5!prime1! and 6!prime1! in study group 7!q1!.  He then produced a 6!prime1! chart of averages.  A man of 7!prime1!, incredibly, is very possible.",
     2031            ),
     2032            array(
     2033                'Pirates have voted on "The Expendables 3" with their clicks -- and it turns out the Sylvester Stallone-starrer hasn\'t been astoundingly popular among digital thieves, relatively speaking.
     2034
     2035As of Sunday, 5.12 million people worldwide had pirated "Expendables 3" since a high-quality copy hit torrent-sharing sites July 23, according to piracy-tracking firm Excipio.
     2036
     2037That likely contributed to the action movie\'s dismal box-office debut this weekend. But over the same July 23-Aug. 18 time period, the movie was No. 4 in downloads, after "Captain America: The Winter Soldier" (7.31 million), "Divergent" (6.29 million) and "The Amazing Spider-Man 2" (5.88 million). Moreover, that\'s despite "Expendables 3" becoming available more than three weeks prior to the film\'s U.S. theatrical debut.
     2038
     2039String with a number followed by a single quote \'Expendables 3\' vestibulum in arcu mi.',
     2040
     2041                'Pirates have voted on !q2!The Expendables 3!q2! with their clicks !emdash! and it turns out the Sylvester Stallone-starrer hasn!apos!t been astoundingly popular among digital thieves, relatively speaking.
     2042
     2043As of Sunday, 5.12 million people worldwide had pirated !q2!Expendables 3!q2! since a high-quality copy hit torrent-sharing sites July 23, according to piracy-tracking firm Excipio.
     2044
     2045That likely contributed to the action movie!apos!s dismal box-office debut this weekend. But over the same July 23-Aug. 18 time period, the movie was No. 4 in downloads, after !q2!Captain America: The Winter Soldier!q2! (7.31 million), !q2!Divergent!q2! (6.29 million) and !q2!The Amazing Spider-Man 2!q2! (5.88 million). Moreover, that!apos!s despite !q2!Expendables 3!q2! becoming available more than three weeks prior to the film!apos!s U.S. theatrical debut.
     2046
     2047String with a number followed by a single quote !q1!Expendables 3!q1! vestibulum in arcu mi.',
     2048            ),
     2049        );
     2050    }
    19032051}
Note: See TracChangeset for help on using the changeset viewer.