Make WordPress Core


Ignore:
Timestamp:
07/14/2017 05:46:19 AM (8 years ago)
Author:
pento
Message:

Emoji: Port the Twemoji regex to PHP.

Previously, wp_encode_emoji() and wp_staticize_emoji() used inaccurate regular expressions to find emoji, and transform then into HTML entities or <img>s, respectively. This would result in emoji not being correctly transformed, or occasionally, non-emoji being incorrectly transformed.

This commit adds a new grunt task - grunt precommit:emoji. It finds the regex in twemoji.js, transforms it into a PHP-friendly version, and adds it to formatting.php. This task is also automatically run by grunt precommit, when it detects that twemoji.js has changed.

The new regex requires features introduced in PCRE 8.32, which was introduced in PHP 5.4.14, though it was also backported to later releases of the PHP 5.3 series. For versions of PHP that don't support this, it will fall back to an updated version of the loose-matching regex.

For short posts, the performance difference between the old and new regex is negligible. As the posts get longer, however, the new method is exponentially faster.

Fixes #35293.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/tests/phpunit/tests/formatting/Emoji.php

    r40837 r41043  
    33/**
    44 * @group formatting
     5 * @group emoji
    56 */
    67class Tests_Formatting_Emoji extends WP_UnitTestCase {
     8
     9    private $png_cdn = 'https://s.w.org/images/core/emoji/2.3/72x72/';
     10    private $svn_cdn = 'https://s.w.org/images/core/emoji/2.3/svg/';
     11
    712    /**
    813     * @ticket 36525
    914     */
    1015    public function test_unfiltered_emoji_cdns() {
    11         $png_cdn = 'https://s.w.org/images/core/emoji/2.3/72x72/';
    12         $svn_cdn = 'https://s.w.org/images/core/emoji/2.3/svg/';
    13 
    1416        $output = get_echo( '_print_emoji_detection_script' );
    1517
    16         $this->assertContains( wp_json_encode( $png_cdn ), $output );
    17         $this->assertContains( wp_json_encode( $svn_cdn ), $output );
     18        $this->assertContains( wp_json_encode( $this->png_cdn ), $output );
     19        $this->assertContains( wp_json_encode( $this->svn_cdn ), $output );
    1820    }
    1921
     
    2628     */
    2729    public function test_filtered_emoji_svn_cdn() {
    28         $png_cdn = 'https://s.w.org/images/core/emoji/2.3/72x72/';
    29         $svn_cdn = 'https://s.w.org/images/core/emoji/2.3/svg/';
    30 
    3130        $filtered_svn_cdn = $this->_filtered_emoji_svn_cdn();
    3231
     
    3534        $output = get_echo( '_print_emoji_detection_script' );
    3635
    37         $this->assertContains( wp_json_encode( $png_cdn ), $output );
    38         $this->assertNotContains( wp_json_encode( $svn_cdn ), $output );
     36        $this->assertContains( wp_json_encode( $this->png_cdn ), $output );
     37        $this->assertNotContains( wp_json_encode( $this->svn_cdn ), $output );
    3938        $this->assertContains( wp_json_encode( $filtered_svn_cdn ), $output );
    4039
     
    5049     */
    5150    public function test_filtered_emoji_png_cdn() {
    52         $png_cdn = 'https://s.w.org/images/core/emoji/2.3/72x72/';
    53         $svn_cdn = 'https://s.w.org/images/core/emoji/2.3/svg/';
    54 
    5551        $filtered_png_cdn = $this->_filtered_emoji_png_cdn();
    5652
     
    6056
    6157        $this->assertContains( wp_json_encode( $filtered_png_cdn ), $output );
    62         $this->assertNotContains( wp_json_encode( $png_cdn ), $output );
    63         $this->assertContains( wp_json_encode( $svn_cdn ), $output );
     58        $this->assertNotContains( wp_json_encode( $this->png_cdn ), $output );
     59        $this->assertContains( wp_json_encode( $this->svn_cdn ), $output );
    6460
    6561        remove_filter( 'emoji_url', array( $this, '_filtered_emoji_png_cdn' ) );
    6662    }
    6763
     64    /**
     65     * @ticket 35293
     66     */
     67    public function test_wp_emoji_regex_returns_regexen() {
     68        $default = wp_emoji_regex();
     69        $this->assertNotEmpty( $default );
     70
     71        $codepoints = wp_emoji_regex( 'codepoints' );
     72        $this->assertNotEmpty( $codepoints );
     73
     74        $this->assertSame( $default, $codepoints );
     75
     76        $entities = wp_emoji_regex( 'entities' );
     77        $this->assertNotEmpty( $entities );
     78
     79        $this->assertNotSame( $default, $entities );
     80    }
     81
     82    public function data_wp_encode_emoji() {
     83        return array(
     84            array(
     85                // Not emoji
     86                '’',
     87                '’',
     88            ),
     89            array(
     90                // Simple emoji
     91                '🙂',
     92                '&#x1f642;',
     93            ),
     94            array(
     95                // Skin tone, gender, ZWJ, emoji selector
     96                '👮🏼‍♀️',
     97                '&#x1f46e;&#x1f3fc;&#x200d;&#x2640;&#xfe0f;',
     98            ),
     99            array(
     100                // Unicode 10
     101                '🧚',
     102                '&#x1f9da;',
     103            ),
     104
     105        );
     106    }
     107
     108    /**
     109     * @ticket 35293
     110     * @dataProvider data_wp_encode_emoji
     111     */
     112    public function test_wp_encode_emoji( $emoji, $expected ) {
     113        $this->assertSame( $expected, wp_encode_emoji( $emoji ) );
     114    }
     115
     116    public function data_wp_staticize_emoji() {
     117        return array(
     118            array(
     119                // Not emoji
     120                '’',
     121                '’',
     122            ),
     123            array(
     124                // Simple emoji
     125                '🙂',
     126                '<img src="' . $this->png_cdn . '1f642.png" alt="🙂" class="wp-smiley" style="height: 1em; max-height: 1em;" />',
     127            ),
     128            array(
     129                // Skin tone, gender, ZWJ, emoji selector
     130                '👮🏼‍♀️',
     131                '<img src="' . $this->png_cdn . '1f46e-1f3fc-200d-2640-fe0f.png" alt="👮🏼‍♀️" class="wp-smiley" style="height: 1em; max-height: 1em;" />',
     132            ),
     133            array(
     134                // Unicode 10
     135                '🧚',
     136                '<img src="' . $this->png_cdn . '1f9da.png" alt="🧚" class="wp-smiley" style="height: 1em; max-height: 1em;" />',
     137            ),
     138
     139        );
     140    }
     141
     142    /**
     143     * @ticket 35293
     144     * @dataProvider data_wp_staticize_emoji
     145     */
     146    public function test_wp_staticize_emoji( $emoji, $expected ) {
     147        $this->assertSame( $expected, wp_staticize_emoji( $emoji ) );
     148    }
    68149}
Note: See TracChangeset for help on using the changeset viewer.