Make WordPress Core

Ticket #29256: miqro-29256.4.patch

File miqro-29256.4.patch, 18.3 KB (added by miqrogroove, 11 years ago)

Handles situations where open and close quotes are identical after translation.

  • src/wp-includes/formatting.php

     
    3030function wptexturize($text, $reset = false) {
    3131        global $wp_cockneyreplace;
    3232        static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements,
    33                 $default_no_texturize_tags, $default_no_texturize_shortcodes, $run_texturize = true;
     33                $default_no_texturize_tags, $default_no_texturize_shortcodes, $run_texturize = true,
     34                $apos_flag, $apos, $prime, $double_prime, $opening_quote, $closing_quote, $opening_single_quote,
     35                $closing_single_quote, $open_q_flag, $open_sq_flag;
    3436
    3537        // If there's nothing to do, just stop.
    3638        if ( empty( $text ) || false === $run_texturize ) {
     
    105107                $dynamic_replacements = array( 'apos' => array(), 'quote' => array(), 'dash' => array() );
    106108                $dynamic = array();
    107109                $spaces = wp_spaces_regexp();
     110                $apos_flag = '<!--apos-->';  // We need a semantic representation that is not identical to another quote.
     111                $open_q_flag = '<!--oq-->';
     112                $open_sq_flag = '<!--osq-->';
    108113
    109114                // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation.
    110115                if ( "'" !== $apos || "'" !== $closing_single_quote ) {
    111                         $dynamic[ '/\'(\d\d)\'(?=\Z|[.,)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos . '$1' . $closing_single_quote;
     116                        $dynamic[ '/\'(\d\d)\'(?=\Z|[.,)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_single_quote;
    112117                }
    113118                if ( "'" !== $apos || '"' !== $closing_quote ) {
    114                         $dynamic[ '/\'(\d\d)"(?=\Z|[.,)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos . '$1' . $closing_quote;
     119                        $dynamic[ '/\'(\d\d)"(?=\Z|[.,)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_quote;
    115120                }
    116121
    117122                // '99 '99s '99's (apostrophe)  But never '9 or '99% or '999 or '99.0.
    118123                if ( "'" !== $apos ) {
    119                         $dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos;
     124                        $dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos_flag;
    120125                }
    121126
    122127                // Quoted Numbers like '0.42'
    123128                if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) {
    124                         $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $opening_single_quote . '$1' . $closing_single_quote;
     129                        $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $closing_single_quote;
    125130                }
    126131
    127132                // Single quote at start, or preceded by (, {, <, [, ", -, or spaces.
    128133                if ( "'" !== $opening_single_quote ) {
    129                         $dynamic[ '/(?<=\A|[([{"\-]|&lt;|' . $spaces . ')\'/' ] = $opening_single_quote;
     134                        $dynamic[ '/(?<=\A|[([{"\-]|&lt;|' . $spaces . ')\'/' ] = $open_sq_flag;
    130135                }
    131136
    132137                // Apostrophe in a word.  No spaces, double apostrophes, or other punctuation.
    133138                if ( "'" !== $apos ) {
    134                         $dynamic[ '/(?<!' . $spaces . ')\'(?!\Z|[.,:;"\'(){}[\]\-]|&[lg]t;|' . $spaces . ')/' ] = $apos;
     139                        $dynamic[ '/(?<!' . $spaces . ')\'(?!\Z|[.,:;"\'(){}[\]\-]|&[lg]t;|' . $spaces . ')/' ] = $apos_flag;
    135140                }
    136141
    137                 // 9' (prime)
    138                 if ( "'" !== $prime ) {
    139                         $dynamic[ '/(?<=\d)\'/' ] = $prime;
    140                 }
    141 
    142                 // Single quotes followed by spaces or ending punctuation.
    143                 if ( "'" !== $closing_single_quote ) {
    144                         $dynamic[ '/\'(?=\Z|[.,)}\-\]]|&gt;|' . $spaces . ')/' ] = $closing_single_quote;
    145                 }
    146 
    147142                $dynamic_characters['apos'] = array_keys( $dynamic );
    148143                $dynamic_replacements['apos'] = array_values( $dynamic );
    149144                $dynamic = array();
     
    150145
    151146                // Quoted Numbers like "42"
    152147                if ( '"' !== $opening_quote && '"' !== $closing_quote ) {
    153                         $dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $opening_quote . '$1' . $closing_quote;
     148                        $dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $open_q_flag . '$1' . $closing_quote;
    154149                }
    155150
    156                 // 9" (double prime)
    157                 if ( '"' !== $double_prime ) {
    158                         $dynamic[ '/(?<=\d)"/' ] = $double_prime;
    159                 }
    160 
    161151                // Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces.
    162152                if ( '"' !== $opening_quote ) {
    163                         $dynamic[ '/(?<=\A|[([{\-]|&lt;|' . $spaces . ')"(?!' . $spaces . ')/' ] = $opening_quote;
     153                        $dynamic[ '/(?<=\A|[([{\-]|&lt;|' . $spaces . ')"(?!' . $spaces . ')/' ] = $open_q_flag;
    164154                }
    165155
    166                 // Any remaining double quotes.
    167                 if ( '"' !== $closing_quote ) {
    168                         $dynamic[ '/"/' ] = $closing_quote;
    169                 }
    170 
    171156                $dynamic_characters['quote'] = array_keys( $dynamic );
    172157                $dynamic_replacements['quote'] = array_values( $dynamic );
    173158                $dynamic = array();
     
    271256
    272257                        if ( false !== strpos( $curl, "'" ) ) {
    273258                                $curl = preg_replace( $dynamic_characters['apos'], $dynamic_replacements['apos'], $curl );
     259                                $curl = wptexturize_primes( $curl, "'", $prime, $open_sq_flag, $closing_single_quote );
     260                                $curl = str_replace( $apos_flag, $apos, $curl );
     261                                $curl = str_replace( $open_sq_flag, $opening_single_quote, $curl );
    274262                        }
    275263                        if ( false !== strpos( $curl, '"' ) ) {
    276264                                $curl = preg_replace( $dynamic_characters['quote'], $dynamic_replacements['quote'], $curl );
     265                                $curl = wptexturize_primes( $curl, '"', $double_prime, $open_q_flag, $closing_quote );
     266                                $curl = str_replace( $open_q_flag, $opening_quote, $curl );
    277267                        }
    278268                        if ( false !== strpos( $curl, '-' ) ) {
    279269                                $curl = preg_replace( $dynamic_characters['dash'], $dynamic_replacements['dash'], $curl );
     
    295285}
    296286
    297287/**
     288 * Implements a logic tree to determine whether or not "7'." represents seven feet,
     289 * then converts the special char into either a prime char or a closing quote char.
     290 *
     291 * @since 4.1.0
     292 *
     293 * @param string $haystack The plain text to be searched.
     294 * @param string $needle The character to search for such as ' or ".
     295 * @param string $prime The prime char to use for replacement.
     296 * @param string $open_quote The opening quote char. Opening quote replacement must be accomplished already.
     297 * @param string $close_quote The closing quote char to use for replacement.
     298 * @return string The $haystack value after primes and quotes replacements.
     299 */
     300function wptexturize_primes( $haystack, $needle, $prime, $open_quote, $close_quote ) {
     301
     302        $spaces = wp_spaces_regexp();
     303        $flag = '<!--wp-prime-or-quote-->';
     304        $quote_pattern = "/$needle(?=\\Z|[.,)}\\-\\]]|&gt;|" . $spaces . ")/";
     305        $prime_pattern    = "/(?<=\\d)$needle/";
     306        $flag_after_digit = "/(?<=\\d)$flag/";
     307        $flag_no_digit    = "/(?<!\\d)$flag/";
     308
     309        $sentences = explode( $open_quote, $haystack );
     310
     311        foreach( $sentences as $key => &$sentence ) {
     312                if ( false === strpos( $sentence, $needle ) ) {
     313                        continue;
     314                } elseif ( 0 !== $key && substr_count( $sentence, $close_quote ) === 0 ) {
     315                        $sentence = preg_replace( $quote_pattern, $flag, $sentence, -1, $count );
     316                        if ( $count > 1) {
     317                                // This sentence appears to have multiple closing quotes.  Attempt Vulcan logic.
     318                                $sentence = preg_replace( $flag_no_digit, $close_quote, $sentence, -1, $count2 );
     319                                if ( 0 === $count2 ) {
     320                                        // Closing quote still ambiguous.  Look for a quote followed by a period.
     321                                        $count2 = substr_count( $sentence, "$flag." );
     322                                        if ( $count2 > 0 ) {
     323                                                // Assume the rightmost quote-period match is the end of quotation.
     324                                                $pos = strrpos( $sentence, "$flag." );
     325                                        } else {
     326                                                // When all else fails, make the rightmost candidate a closing quote.
     327                                                // This is most likely to be problematic in the context of bug #18549.
     328                                                $pos = strrpos( $sentence, $flag );
     329                                        }
     330                                        $sentence = substr_replace( $sentence, $close_quote, $pos, strlen( $flag ) );
     331                                }
     332                                // Use conventional replacement on any remaining primes and quotes.
     333                                $sentence = preg_replace( $prime_pattern, $prime, $sentence );
     334                                $sentence = preg_replace( $flag_after_digit, $prime, $sentence );
     335                                $sentence = str_replace( $flag, $close_quote, $sentence );
     336                        } elseif ( 1 == $count ) {
     337                                // Found only one closing quote candidate, so give it priority over primes.
     338                                $sentence = str_replace( $flag, $close_quote, $sentence );
     339                                $sentence = preg_replace( $prime_pattern, $prime, $sentence );
     340                        } else {
     341                                // No closing quotes found.  Just run primes pattern.
     342                                $sentence = preg_replace( $prime_pattern, $prime, $sentence );
     343                        }
     344                } else {
     345                        $sentence = preg_replace( $prime_pattern, $prime, $sentence );
     346                        $sentence = preg_replace( $quote_pattern, $close_quote, $sentence );
     347                }
     348                if ( '"' == $needle ) {
     349                        $sentence = str_replace( '"', $close_quote, $sentence );
     350                }
     351        }
     352
     353        return implode( $open_quote, $sentences );
     354}
     355
     356/**
    298357 * Search for disabled element tags. Push element to stack on tag open and pop
    299358 * on tag close.
    300359 *
  • tests/phpunit/tests/formatting/WPTexturize.php

     
    9090                //$this->assertEquals('Here is &#8220;<a href="http://example.com">a test with a link</a>&#8221;&#8230; and ellipses.', wptexturize('Here is "<a href="http://example.com">a test with a link</a>"... and ellipses.'));
    9191                //$this->assertEquals('Here is &#8220;a test <a href="http://example.com">with a link</a>&#8221;.', wptexturize('Here is "a test <a href="http://example.com">with a link</a>".'));
    9292                //$this->assertEquals('Here is &#8220;<a href="http://example.com">a test with a link</a>&#8221;and a work stuck to the end.', wptexturize('Here is "<a href="http://example.com">a test with a link</a>"and a work stuck to the end.'));
    93                 //$this->assertEquals('A test with a finishing number, &#8220;like 23&#8221;.', wptexturize('A test with a finishing number, "like 23".'));
    94                 //$this->assertEquals('A test with a number, &#8220;like 62&#8221;, is nice to have.', wptexturize('A test with a number, "like 62", is nice to have.'));
     93                $this->assertEquals('A test with a finishing number, &#8220;like 23&#8221;.', wptexturize('A test with a finishing number, "like 23".'));
     94                $this->assertEquals('A test with a number, &#8220;like 62&#8221;, is nice to have.', wptexturize('A test with a number, "like 62", is nice to have.'));
    9595        }
    9696
    9797        /**
     
    114114                $this->assertEquals('&#8216;Class of &#8217;99&#8217;', wptexturize("'Class of '99'"));
    115115                $this->assertEquals('&#8216;Class of &#8217;99&#8217;s&#8217;', wptexturize("'Class of '99's'"));
    116116                $this->assertEquals('&#8216;Class of &#8217;99&#8217;s&#8217;', wptexturize("'Class of '99&#8217;s'"));
    117                 //$this->assertEquals('&#8220;Class of 99&#8221;', wptexturize("\"Class of 99\""));
     117                $this->assertEquals('&#8220;Class of 99&#8221;', wptexturize("\"Class of 99\""));
    118118                $this->assertEquals('&#8220;Class of &#8217;99&#8221;', wptexturize("\"Class of '99\""));
    119119                $this->assertEquals('{&#8220;Class of &#8217;99&#8221;}', wptexturize("{\"Class of '99\"}"));
    120120                $this->assertEquals(' &#8220;Class of &#8217;99&#8221; ', wptexturize(" \"Class of '99\" "));
     
    18231823                        ),
    18241824                );
    18251825        }
     1826
     1827        /**
     1828         * Ensure primes logic is not too greedy at the end of a quotation.
     1829         *
     1830         * @ticket 29256
     1831         * @dataProvider primes_vs_quotes
     1832         */
     1833        function test_primes_vs_quotes( $input, $output ) {
     1834                return $this->assertEquals( $output, wptexturize( $input ) );
     1835        }
     1836
     1837        function data_primes_vs_quotes() {
     1838                return array(
     1839                        array(
     1840                                "George's porch is 99' long.",
     1841                                "George&#8217;s porch is 99&#8242; long.",
     1842                        ),
     1843                        array(
     1844                                'The best year "was that time in 2012" when everyone partied, he said.',
     1845                                'The best year &#8220;was that time in 2012&#8221; when everyone partied, he said.',
     1846                        ),
     1847                        array(
     1848                                "I need 4 x 20' = 80' of trim.", // Works only with a space before the = char.
     1849                                "I need 4 x 20&#8242; = 80&#8242; of trim.",
     1850                        ),
     1851                        array(
     1852                                '"Lorem ipsum dolor sit amet 1234"',
     1853                                '&#8220;Lorem ipsum dolor sit amet 1234&#8221;',
     1854                        ),
     1855                        array(
     1856                                "'Etiam eu egestas dui 1234'",
     1857                                "&#8216;Etiam eu egestas dui 1234&#8217;",
     1858                        ),
     1859                        array(
     1860                                'according to our source, "33% of all students scored less than 50" on the test.',
     1861                                'according to our source, &#8220;33% of all students scored less than 50&#8221; on the test.',
     1862                        ),
     1863                        array(
     1864                                "The doctor said, 'An average height is between 5' and 6' in study group 7'.  He then produced a 6' chart of averages.  A man of 7', incredibly, is very possible.",
     1865                                "The doctor said, &#8216;An average height is between 5&#8242; and 6&#8242; in study group 7&#8217;.  He then produced a 6&#8242; chart of averages.  A man of 7&#8242;, incredibly, is very possible.",
     1866                        ),
     1867                        array(
     1868                                'Pirates have voted on "The Expendables 3" with their clicks -- and it turns out the Sylvester Stallone-starrer hasn\'t been astoundingly popular among digital thieves, relatively speaking.
     1869
     1870As of Sunday, 5.12 million people worldwide had pirated "Expendables 3" since a high-quality copy hit torrent-sharing sites July 23, according to piracy-tracking firm Excipio.
     1871
     1872That likely contributed to the action movie\'s dismal box-office debut this weekend. But over the same July 23-Aug. 18 time period, the movie was No. 4 in downloads, after "Captain America: The Winter Soldier" (7.31 million), "Divergent" (6.29 million) and "The Amazing Spider-Man 2" (5.88 million). Moreover, that\'s despite "Expendables 3" becoming available more than three weeks prior to the film\'s U.S. theatrical debut.
     1873
     1874String with a number followed by a single quote \'Expendables 3\' vestibulum in arcu mi.',
     1875
     1876                                'Pirates have voted on &#8220;The Expendables 3&#8221; with their clicks &#8212; and it turns out the Sylvester Stallone-starrer hasn&#8217;t been astoundingly popular among digital thieves, relatively speaking.
     1877
     1878As of Sunday, 5.12 million people worldwide had pirated &#8220;Expendables 3&#8221; since a high-quality copy hit torrent-sharing sites July 23, according to piracy-tracking firm Excipio.
     1879
     1880That likely contributed to the action movie&#8217;s dismal box-office debut this weekend. But over the same July 23-Aug. 18 time period, the movie was No. 4 in downloads, after &#8220;Captain America: The Winter Soldier&#8221; (7.31 million), &#8220;Divergent&#8221; (6.29 million) and &#8220;The Amazing Spider-Man 2&#8221; (5.88 million). Moreover, that&#8217;s despite &#8220;Expendables 3&#8221; becoming available more than three weeks prior to the film&#8217;s U.S. theatrical debut.
     1881
     1882String with a number followed by a single quote &#8216;Expendables 3&#8217; vestibulum in arcu mi.',
     1883                        ),
     1884                );
     1885        }
     1886
     1887        /**
     1888         * Make sure translation actually works.
     1889         *
     1890         * Also make sure opening and closing quotes are allowed to be identical.
     1891         *
     1892         * @ticket 29256
     1893         * @dataProvider data_primes_quotes_translation
     1894         */
     1895        function test_primes_quotes_translation( $input, $output ) {
     1896                add_filter( 'gettext_with_context', array( $this, 'filter_translate2' ), 10, 4 );
     1897
     1898                $result = wptexturize( $input, true );
     1899
     1900                remove_filter( 'gettext_with_context', array( $this, 'filter_translate2' ), 10, 4 );
     1901                wptexturize( 'reset', true );
     1902
     1903                return $this->assertEquals( $output, $result );
     1904        }
     1905
     1906        function filter_translate2( $translations, $text, $context, $domain ) {
     1907                switch ($input) {
     1908                        case '&#8211;' : return '!endash!';
     1909                        case '&#8212;' : return '!emdash!';
     1910                        case '&#8216;' : return '!q1!';
     1911                        case '&#8217;' :
     1912                                if ( 'apostrophe' == $context ) {
     1913                                        return '!apos!';
     1914                                } else {
     1915                                        return '!q1!';
     1916                                }
     1917                        case '&#8220;' : return '!q2!';
     1918                        case '&#8221;' : return '!q2!';
     1919                        case '&#8242;' : return '!prime1!';
     1920                        case '&#8243;' : return '!prime2!';
     1921                        default : return $input;
     1922                }
     1923        }
     1924
     1925        function data_primes_quotes_translation() {
     1926                return array(
     1927                        array(
     1928                                "George's porch is 99' long.",
     1929                                "George!apos!s porch is 99!prime1! long.",
     1930                        ),
     1931                        array(
     1932                                'The best year "was that time in 2012" when everyone partied, he said.',
     1933                                'The best year !q2!was that time in 2012!q2! when everyone partied, he said.',
     1934                        ),
     1935                        array(
     1936                                "I need 4 x 20' = 80' of trim.", // Works only with a space before the = char.
     1937                                "I need 4 x 20!prime1! = 80!prime1! of trim.",
     1938                        ),
     1939                        array(
     1940                                '"Lorem ipsum dolor sit amet 1234"',
     1941                                '!q2!Lorem ipsum dolor sit amet 1234!q2!',
     1942                        ),
     1943                        array(
     1944                                "'Etiam eu egestas dui 1234'",
     1945                                "!q1!Etiam eu egestas dui 1234!q1!",
     1946                        ),
     1947                        array(
     1948                                'according to our source, "33% of all students scored less than 50" on the test.',
     1949                                'according to our source, !q2!33% of all students scored less than 50!q2! on the test.',
     1950                        ),
     1951                        array(
     1952                                "The doctor said, 'An average height is between 5' and 6' in study group 7'.  He then produced a 6' chart of averages.  A man of 7', incredibly, is very possible.",
     1953                                "The doctor said, !q1!An average height is between 5!prime1! and 6!prime1! in study group 7!q1!.  He then produced a 6!prime1! chart of averages.  A man of 7!prime1!, incredibly, is very possible.",
     1954                        ),
     1955                        array(
     1956                                'Pirates have voted on "The Expendables 3" with their clicks -- and it turns out the Sylvester Stallone-starrer hasn\'t been astoundingly popular among digital thieves, relatively speaking.
     1957
     1958As of Sunday, 5.12 million people worldwide had pirated "Expendables 3" since a high-quality copy hit torrent-sharing sites July 23, according to piracy-tracking firm Excipio.
     1959
     1960That likely contributed to the action movie\'s dismal box-office debut this weekend. But over the same July 23-Aug. 18 time period, the movie was No. 4 in downloads, after "Captain America: The Winter Soldier" (7.31 million), "Divergent" (6.29 million) and "The Amazing Spider-Man 2" (5.88 million). Moreover, that\'s despite "Expendables 3" becoming available more than three weeks prior to the film\'s U.S. theatrical debut.
     1961
     1962String with a number followed by a single quote \'Expendables 3\' vestibulum in arcu mi.',
     1963
     1964                                'Pirates have voted on !q2!The Expendables 3!q2! with their clicks !emdash! and it turns out the Sylvester Stallone-starrer hasn!apos!t been astoundingly popular among digital thieves, relatively speaking.
     1965
     1966As of Sunday, 5.12 million people worldwide had pirated !q2!Expendables 3!q2! since a high-quality copy hit torrent-sharing sites July 23, according to piracy-tracking firm Excipio.
     1967
     1968That likely contributed to the action movie!apos!s dismal box-office debut this weekend. But over the same July 23-Aug. 18 time period, the movie was No. 4 in downloads, after !q2!Captain America: The Winter Soldier!q2! (7.31 million), !q2!Divergent!q2! (6.29 million) and !q2!The Amazing Spider-Man 2!q2! (5.88 million). Moreover, that!apos!s despite !q2!Expendables 3!q2! becoming available more than three weeks prior to the film!apos!s U.S. theatrical debut.
     1969
     1970String with a number followed by a single quote !q1!Expendables 3!q1! vestibulum in arcu mi.',
     1971                        ),
     1972                );
     1973        }
    18261974}
     1975 No newline at end of file