Make WordPress Core


Ignore:
Timestamp:
06/02/2024 03:14:35 PM (21 months ago)
Author:
dmsnell
Message:

HTML API: Add custom text decoder.

Provides a custom decoder for strings coming from HTML attributes and
markup. This custom decoder is necessary because of deficiencies in
PHP's html_entity_decode() function:

  • It isn't aware of 720 of the possible named character references in HTML, leaving many out that should be translated.
  • It isn't aware of the ambiguous ampersand rule, which allows conversion of character references in certain contexts when they are missing their closing ;.
  • It doesn't draw a distinction for the ambiguous ampersand rule when decoding attribute values instead of markup values.
  • Use of html_entity_decode() requires manually passing non-default paramter values to ensure it decodes properly.

This decoder also provides some conveniences, such as making a
single-pass and interruptable decode operation possible. This will
provide a number of opportunities to optimize detection and decoding
of things like value prefixes, and whether a value contains a given
substring.

Developed in https://github.com/WordPress/wordpress-develop/pull/6387
Discussed in https://core.trac.wordpress.org/ticket/61072

Props dmsnell, gziolo, jonsurrell, jorbin, westonruter, zieladam.
Fixes #61072.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php

    r58072 r58281  
    3232     */
    3333    const SKIP_TESTS = array(
    34         'adoption01/line0046'        => 'Unimplemented: Reconstruction of active formatting elements.',
    35         'adoption01/line0159'        => 'Unimplemented: Reconstruction of active formatting elements.',
    36         'adoption01/line0318'        => 'Unimplemented: Reconstruction of active formatting elements.',
    37         'entities02/line0100'        => 'Encoded characters without semicolon termination in attribute values are not handled properly',
    38         'entities02/line0114'        => 'Encoded characters without semicolon termination in attribute values are not handled properly',
    39         'entities02/line0128'        => 'Encoded characters without semicolon termination in attribute values are not handled properly',
    40         'entities02/line0142'        => 'Encoded characters without semicolon termination in attribute values are not handled properly',
    41         'entities02/line0156'        => 'Encoded characters without semicolon termination in attribute values are not handled properly',
    42         'inbody01/line0001'          => 'Bug.',
    43         'inbody01/line0014'          => 'Bug.',
    44         'inbody01/line0029'          => 'Bug.',
    45         'menuitem-element/line0012'  => 'Bug.',
    46         'plain-text-unsafe/line0001' => 'HTML entities may be mishandled.',
    47         'plain-text-unsafe/line0105' => 'Binary.',
    48         'tests1/line0342'            => "Closing P tag implicitly creates opener, which we don't visit.",
    49         'tests1/line0720'            => 'Unimplemented: Reconstruction of active formatting elements.',
    50         'tests1/line0833'            => 'Bug.',
    51         'tests15/line0001'           => 'Unimplemented: Reconstruction of active formatting elements.',
    52         'tests15/line0022'           => 'Unimplemented: Reconstruction of active formatting elements.',
    53         'tests2/line0317'            => 'HTML entities may be mishandled.',
    54         'tests2/line0408'            => 'HTML entities may be mishandled.',
    55         'tests2/line0650'            => 'Whitespace only test never enters "in body" parsing mode.',
    56         'tests20/line0497'           => "Closing P tag implicitly creates opener, which we don't visit.",
    57         'tests23/line0001'           => 'Unimplemented: Reconstruction of active formatting elements.',
    58         'tests23/line0041'           => 'Unimplemented: Reconstruction of active formatting elements.',
    59         'tests23/line0069'           => 'Unimplemented: Reconstruction of active formatting elements.',
    60         'tests23/line0101'           => 'Unimplemented: Reconstruction of active formatting elements.',
    61         'tests25/line0169'           => 'Bug.',
    62         'tests26/line0263'           => 'Bug: An active formatting element should be created for a trailing text node.',
    63         'tests7/line0354'            => 'Bug.',
    64         'tests8/line0001'            => 'Bug.',
    65         'tests8/line0020'            => 'Bug.',
    66         'tests8/line0037'            => 'Bug.',
    67         'tests8/line0052'            => 'Bug.',
    68         'webkit01/line0174'          => 'Bug.',
     34        'adoption01/line0046'       => 'Unimplemented: Reconstruction of active formatting elements.',
     35        'adoption01/line0159'       => 'Unimplemented: Reconstruction of active formatting elements.',
     36        'adoption01/line0318'       => 'Unimplemented: Reconstruction of active formatting elements.',
     37        'inbody01/line0001'         => 'Bug.',
     38        'inbody01/line0014'         => 'Bug.',
     39        'inbody01/line0029'         => 'Bug.',
     40        'menuitem-element/line0012' => 'Bug.',
     41        'tests1/line0342'           => "Closing P tag implicitly creates opener, which we don't visit.",
     42        'tests1/line0720'           => 'Unimplemented: Reconstruction of active formatting elements.',
     43        'tests1/line0833'           => 'Bug.',
     44        'tests15/line0001'          => 'Unimplemented: Reconstruction of active formatting elements.',
     45        'tests15/line0022'          => 'Unimplemented: Reconstruction of active formatting elements.',
     46        'tests2/line0650'           => 'Whitespace only test never enters "in body" parsing mode.',
     47        'tests20/line0497'          => "Closing P tag implicitly creates opener, which we don't visit.",
     48        'tests23/line0001'          => 'Unimplemented: Reconstruction of active formatting elements.',
     49        'tests23/line0041'          => 'Unimplemented: Reconstruction of active formatting elements.',
     50        'tests23/line0069'          => 'Unimplemented: Reconstruction of active formatting elements.',
     51        'tests23/line0101'          => 'Unimplemented: Reconstruction of active formatting elements.',
     52        'tests25/line0169'          => 'Bug.',
     53        'tests26/line0263'          => 'Bug: An active formatting element should be created for a trailing text node.',
     54        'tests7/line0354'           => 'Bug.',
     55        'tests8/line0001'           => 'Bug.',
     56        'tests8/line0020'           => 'Bug.',
     57        'tests8/line0037'           => 'Bug.',
     58        'tests8/line0052'           => 'Bug.',
     59        'webkit01/line0174'         => 'Bug.',
    6960    );
    7061
     
    10596        while ( false !== ( $entry = readdir( $handle ) ) ) {
    10697            if ( ! stripos( $entry, '.dat' ) ) {
    107                 continue;
    108             }
    109 
    110             if ( 'entities01.dat' === $entry || 'entities02.dat' === $entry ) {
    11198                continue;
    11299            }
Note: See TracChangeset for help on using the changeset viewer.