commit 9740f5cb43af8ec4828d23d32cb4adfe5da18c4d
Author: Arnt Gulbrandsen <arnt@gulbrandsen.priv.no>
Date: Fri Apr 28 12:03:03 2023 +0200
General: Add support for unicode email addresses in is_email
This adds support for the unicode address extensions in RFC 6532, adds
unit tests for that, extends the documentation to explain the relationship
between this code and the various specifications, and finally adds unit
tests to ensure that the documentation's description of the code remains
correct.
Fixes #31992.
diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php
index 8240adcc82..ec3aba1ef0 100644
a
|
b
|
function convert_smilies( $text ) { |
3489 | 3489 | /** |
3490 | 3490 | * Verifies that an email is valid. |
3491 | 3491 | * |
3492 | | * Does not grok i18n domains. Not RFC compliant. |
| 3492 | * The mostly matches what people think is the format of email |
| 3493 | * addresses, and is close to all three current specifications. |
| 3494 | * |
| 3495 | * Email address syntax is specified in RFC 5322 for ASCII-only email |
| 3496 | * and in RFC 6532 for unicode email (both unicode domains and |
| 3497 | * localparts). In addition, the HTML WHATWG specification contains a |
| 3498 | * third syntax which is used for HTML form input (except that major |
| 3499 | * browsers deviate a little from the WHATWG specification). |
| 3500 | * |
| 3501 | * This function matches the WHATWG and RFC 6532 specifications fairly |
| 3502 | * well, although there are some differences. " "@example.com (quote |
| 3503 | * space quote at ...) is allowed by the RFCs and rejected by this |
| 3504 | * code, while ..@example.com is allowed by this code and prohibited |
| 3505 | * by the RFCs. info@grå.org is allowed by this code and major |
| 3506 | * browsers, but prohibited by WHATWG's regex (as of April 2023). |
3493 | 3507 | * |
3494 | 3508 | * @since 0.71 |
3495 | 3509 | * |
… |
… |
function is_email( $email, $deprecated = false ) { |
3531 | 3545 | |
3532 | 3546 | // LOCAL PART |
3533 | 3547 | // Test for invalid characters. |
3534 | | if ( ! preg_match( '/^[a-zA-Z0-9!#$%&\'*+\/=?^_`{|}~\.-]+$/', $local ) ) { |
| 3548 | if ( ! ( preg_match( '/^[a-zA-Z0-9\x80-\xff!#$%&\'*+\/=?^_`{|}~\.-]+$/', $local ) && |
| 3549 | preg_match( '/^\X+$/', $local ) ) ) { |
3535 | 3550 | /** This filter is documented in wp-includes/formatting.php */ |
3536 | 3551 | return apply_filters( 'is_email', false, $email, 'local_invalid_chars' ); |
3537 | 3552 | } |
… |
… |
function is_email( $email, $deprecated = false ) { |
3567 | 3582 | } |
3568 | 3583 | |
3569 | 3584 | // Test for invalid characters. |
3570 | | if ( ! preg_match( '/^[a-z0-9-]+$/i', $sub ) ) { |
| 3585 | if ( ! ( preg_match( '/^[a-z0-9\x80-\xff-]+$/i', $sub ) && |
| 3586 | preg_match( '/^\X+$/', $sub ) ) ) { |
3571 | 3587 | /** This filter is documented in wp-includes/formatting.php */ |
3572 | 3588 | return apply_filters( 'is_email', false, $email, 'sub_invalid_chars' ); |
3573 | 3589 | } |
diff --git a/tests/phpunit/tests/formatting/isEmail.php b/tests/phpunit/tests/formatting/isEmail.php
index c3f0a7c45b..cbdc1067cd 100644
a
|
b
|
class Tests_Formatting_IsEmail extends WP_UnitTestCase { |
14 | 14 | 'kevin@many.subdomains.make.a.happy.man.edu', |
15 | 15 | 'a@b.co', |
16 | 16 | 'bill+ted@example.com', |
| 17 | 'info@grå.org', |
| 18 | 'grå@grå.org', |
| 19 | "gr\u{0061}\u{030a}blå@grå.org", |
| 20 | '..@example.com', |
17 | 21 | ); |
18 | 22 | foreach ( $data as $datum ) { |
19 | 23 | $this->assertSame( $datum, is_email( $datum ), $datum ); |
… |
… |
class Tests_Formatting_IsEmail extends WP_UnitTestCase { |
28 | 32 | 'com.exampleNOSPAMbob', |
29 | 33 | 'bob@your mom', |
30 | 34 | 'a@b.c', |
| 35 | '" "@b.c', |
31 | 36 | ); |
32 | 37 | foreach ( $data as $datum ) { |
33 | 38 | $this->assertFalse( is_email( $datum ), $datum ); |