Make WordPress Core


Ignore:
Timestamp:
07/18/2017 03:47:29 AM (8 years ago)
Author:
pento
Message:

Emoji: Port the Twemoji regex to PHP.

Previously, wp_encode_emoji() and wp_staticize_emoji() used inaccurate regular expressions to find emoji, and transform then into HTML entities or <img>s, respectively. This would result in emoji not being correctly transformed, or occasionally, non-emoji being incorrectly transformed.

This commit adds a new grunt task - grunt precommit:emoji. It finds the regex in twemoji.js, transforms it into a PHP-friendly version, and adds it to formatting.php. This task is also automatically run by grunt precommit, when it detects that twemoji.js has changed.

The new regex requires features introduced in PCRE 8.32, which was introduced in PHP 5.4.14, though it was also backported to later releases of the PHP 5.3 series. For versions of PHP that don't support this, it will fall back to an updated version of the loose-matching regex.

For short posts, the performance difference between the old and new regex is negligible. As the posts get longer, however, the new method is exponentially faster.

Merges [41043], [41045], and [41046] to the 4.8 branch.

Fixes #35293.

Location:
branches/4.8
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/4.8

  • branches/4.8/tests/phpunit/tests/formatting/Emoji.php

    r40837 r41069  
    33/**
    44 * @group formatting
     5 * @group emoji
    56 */
    67class Tests_Formatting_Emoji extends WP_UnitTestCase {
     8
     9    private $png_cdn = 'https://s.w.org/images/core/emoji/2.3/72x72/';
     10    private $svn_cdn = 'https://s.w.org/images/core/emoji/2.3/svg/';
     11
    712    /**
    813     * @ticket 36525
    914     */
    1015    public function test_unfiltered_emoji_cdns() {
    11         $png_cdn = 'https://s.w.org/images/core/emoji/2.3/72x72/';
    12         $svn_cdn = 'https://s.w.org/images/core/emoji/2.3/svg/';
    13 
    1416        $output = get_echo( '_print_emoji_detection_script' );
    1517
    16         $this->assertContains( wp_json_encode( $png_cdn ), $output );
    17         $this->assertContains( wp_json_encode( $svn_cdn ), $output );
     18        $this->assertContains( wp_json_encode( $this->png_cdn ), $output );
     19        $this->assertContains( wp_json_encode( $this->svn_cdn ), $output );
    1820    }
    1921
     
    2628     */
    2729    public function test_filtered_emoji_svn_cdn() {
    28         $png_cdn = 'https://s.w.org/images/core/emoji/2.3/72x72/';
    29         $svn_cdn = 'https://s.w.org/images/core/emoji/2.3/svg/';
    30 
    3130        $filtered_svn_cdn = $this->_filtered_emoji_svn_cdn();
    3231
     
    3534        $output = get_echo( '_print_emoji_detection_script' );
    3635
    37         $this->assertContains( wp_json_encode( $png_cdn ), $output );
    38         $this->assertNotContains( wp_json_encode( $svn_cdn ), $output );
     36        $this->assertContains( wp_json_encode( $this->png_cdn ), $output );
     37        $this->assertNotContains( wp_json_encode( $this->svn_cdn ), $output );
    3938        $this->assertContains( wp_json_encode( $filtered_svn_cdn ), $output );
    4039
     
    5049     */
    5150    public function test_filtered_emoji_png_cdn() {
    52         $png_cdn = 'https://s.w.org/images/core/emoji/2.3/72x72/';
    53         $svn_cdn = 'https://s.w.org/images/core/emoji/2.3/svg/';
    54 
    5551        $filtered_png_cdn = $this->_filtered_emoji_png_cdn();
    5652
     
    6056
    6157        $this->assertContains( wp_json_encode( $filtered_png_cdn ), $output );
    62         $this->assertNotContains( wp_json_encode( $png_cdn ), $output );
    63         $this->assertContains( wp_json_encode( $svn_cdn ), $output );
     58        $this->assertNotContains( wp_json_encode( $this->png_cdn ), $output );
     59        $this->assertContains( wp_json_encode( $this->svn_cdn ), $output );
    6460
    6561        remove_filter( 'emoji_url', array( $this, '_filtered_emoji_png_cdn' ) );
    6662    }
    6763
     64    /**
     65     * @ticket 35293
     66     */
     67    public function test_wp_emoji_regex_returns_regexen() {
     68        $default = wp_emoji_regex();
     69        $this->assertNotEmpty( $default );
     70
     71        $codepoints = wp_emoji_regex( 'codepoints' );
     72        $this->assertNotEmpty( $codepoints );
     73
     74        $this->assertSame( $default, $codepoints );
     75
     76        $entities = wp_emoji_regex( 'entities' );
     77        $this->assertNotEmpty( $entities );
     78
     79        $this->assertNotSame( $default, $entities );
     80    }
     81
     82    public function data_wp_encode_emoji() {
     83        return array(
     84            array(
     85                // Not emoji
     86                '’',
     87                '’',
     88            ),
     89            array(
     90                // Simple emoji
     91                '🙂',
     92                '&#x1f642;',
     93            ),
     94            array(
     95                // Skin tone, gender, ZWJ, emoji selector
     96                '👮🏼‍♀️',
     97                '&#x1f46e;&#x1f3fc;&#x200d;&#x2640;&#xfe0f;',
     98            ),
     99            array(
     100                // Unicode 10
     101                '🧚',
     102                '&#x1f9da;',
     103            ),
     104        );
     105    }
     106
     107    /**
     108     * @ticket 35293
     109     * @dataProvider data_wp_encode_emoji
     110     */
     111    public function test_wp_encode_emoji( $emoji, $expected ) {
     112        $this->assertSame( $expected, wp_encode_emoji( $emoji ) );
     113    }
     114
     115    public function data_wp_staticize_emoji() {
     116        $data = array(
     117            array(
     118                // Not emoji
     119                '’',
     120                '’',
     121            ),
     122            array(
     123                // Simple emoji
     124                '🙂',
     125                '<img src="' . $this->png_cdn . '1f642.png" alt="" class="wp-smiley" style="height: 1em; max-height: 1em;" />',
     126            ),
     127            array(
     128                // Skin tone, gender, ZWJ, emoji selector
     129                '👮🏼‍♀️',
     130                '<img src="' . $this->png_cdn . '1f46e-1f3fc-200d-2640-fe0f.png" alt="" class="wp-smiley" style="height: 1em; max-height: 1em;" />',
     131            ),
     132            array(
     133                // Unicode 10
     134                '🧚',
     135                '<img src="' . $this->png_cdn . '1f9da.png" alt="" class="wp-smiley" style="height: 1em; max-height: 1em;" />',
     136            ),
     137        );
     138
     139        // Older versions of PHP don't html_entity_decode() emoji, so we need to make sure they're testing in the expected form.
     140        foreach ( $data as $key => $datum ) {
     141            $emoji = html_entity_decode( wp_encode_emoji( $datum[0] ) );
     142            $data[ $key ][1] = str_replace( 'alt=""', 'alt="' . $emoji . '"', $datum[1] );
     143        }
     144
     145        return $data;
     146    }
     147
     148    /**
     149     * @ticket 35293
     150     * @dataProvider data_wp_staticize_emoji
     151     */
     152    public function test_wp_staticize_emoji( $emoji, $expected ) {
     153        $this->assertSame( $expected, wp_staticize_emoji( $emoji ) );
     154    }
    68155}
Note: See TracChangeset for help on using the changeset viewer.