Make WordPress Core


Ignore:
Timestamp:
06/10/2026 03:04:54 PM (6 days ago)
Author:
dmsnell
Message:

General: Add support for unicode email addresses in is_email and sanitize_email

This adds support for the unicode address extensions in RFC 6530-3 and refactors the code so there are fewer long regexes and less duplication between sanitize_email and is_email. A new class, WP_Email_Address, provides the shared parts.

Opting out of unicode support is easy, default-filters.php adds unicode support by adding filters, which can be removed.

sanitize_email no longer does major changes like removing an entire subdomain from someone's address, it only cleans up things like soft hyphens and whitespace — changes that happen when coping an email address from text.

Developed in: https://github.com/WordPress/wordpress-develop/pull/5237
Discussed in: https://core.trac.wordpress.org/ticket/31992

Props agulbra, akirk, benniledl, dmsnell, ironprogrammer, justlevine, mdawaffe, mukeshpanchal27, SirLouen, tusharbharti.
Fixes #31992.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/tests/phpunit/tests/formatting/sanitizeEmail.php

    r62249 r62482  
    4242    public function data_sanitized_email_pairs() {
    4343        return array(
    44             'shorter than 6 characters'      => array( 'a@b', '' ),
    45             'contains no @'                  => array( 'ab', '' ),
    46             'just a TLD'                     => array( 'abc@com', '' ),
    47             'plain'                          => array( 'abc@example.com', 'abc@example.com' ),
    48             'invalid utf8 subdomain dropped' => array( "abc@sub.\x80.org", 'abc@sub.org' ),
    49             'all subdomains invalid utf8'    => array( "abc@\x80.org", '' ),
     44            'shorter than 6 characters'        => array( 'a@b', '' ),
     45            'contains no @'                    => array( 'ab', '' ),
     46            'just a TLD'                       => array( 'abc@com', '' ),
     47            'plain'                            => array( 'abc@example.com', 'abc@example.com' ),
     48            'unicode domain'                   => array( 'abc@grå.org', 'abc@grå.org' ),
     49            'unicode local part'               => array( 'grå@example.com', 'grå@example.com' ),
     50            'unicode local and domain'         => array( 'grå@grå.org', 'grå@grå.org' ),
     51            'invalid utf8 in local'            => array( "a\x80b@example.com", '' ),
     52            'invalid utf8 subdomain'           => array( "abc@sub.\x80.org", '' ),
     53            'all subdomains invalid utf8'      => array( "abc@\x80.org", '' ),
     54            'soft hyphen before dot'           => array( "info@example\xC2\xAD.com", 'info@example.com' ),
     55            'soft hyphen after dot'            => array( "info@example.\xC2\xADcom", 'info@example.com' ),
     56            'space before dot'                 => array( 'info@example .com', 'info@example.com' ),
     57            'space after dot'                  => array( 'info@example. com', 'info@example.com' ),
     58            'soft hyphen and space around dot' => array( "info@example \xC2\xAD.com", 'info@example.com' ),
     59            'space around at sign'             => array( 'info @ example.com', 'info@example.com' ),
     60            'soft hyphen before at sign'       => array( "info\xC2\xAD@example.com", 'info@example.com' ),
     61            'display name with angle brackets' => array( 'Alice Example <alice@example.com>', 'alice@example.com' ),
     62            'angle brackets only'              => array( '<alice@example.com>', 'alice@example.com' ),
     63            'angle brackets invalid address'   => array( 'Alice <not-an-email>', '' ),
    5064        );
    5165    }
Note: See TracChangeset for help on using the changeset viewer.