Make WordPress Core

Changeset 58714


Ignore:
Timestamp:
07/12/2024 10:27:20 PM (2 months ago)
Author:
dmsnell
Message:

HTML API: Add context to Unsupported_Exception class for improved debugging.

The HTML Processor internally throws an exception when it reaches HTML
that it knows it cannot process, but this exception is not made
available to calling code. It can be useful to extract more knowledge
about why it gave up, especially for debugging purposes.

In this patch, more context is added to the WP_HTML_Unsupported_Exception
and the last exception is made available to calling code through a new
method, get_unsupported_exception().

Developed in https://github.com/WordPress/wordpress-develop/pull/6985
Discussed in https://core.trac.wordpress.org/ticket/61646

Props bernhard-reiter, dmsnell, jonsurrell.
See #61646.

Location:
trunk/src/wp-includes/html-api
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/html-api/class-wp-html-processor.php

    r58713 r58714  
    190190
    191191    /**
     192     * Stores context for why the parser bailed on unsupported HTML, if it did.
     193     *
     194     * @see self::get_unsupported_exception
     195     *
     196     * @since 6.7.0
     197     *
     198     * @var WP_HTML_Unsupported_Exception|null
     199     */
     200    private $unsupported_exception = null;
     201
     202    /**
    192203     * Releases a bookmark when PHP garbage-collects its wrapping WP_HTML_Token instance.
    193204     *
     
    386397
    387398    /**
     399     * Stops the parser and terminates its execution when encountering unsupported markup.
     400     *
     401     * @throws WP_HTML_Unsupported_Exception Halts execution of the parser.
     402     *
     403     * @since 6.7.0
     404     *
     405     * @param string $message Explains support is missing in order to parse the current node.
     406     *
     407     * @return mixed
     408     */
     409    private function bail( string $message ) {
     410        $here  = $this->bookmarks[ $this->state->current_token->bookmark_name ];
     411        $token = substr( $this->html, $here->start, $here->length );
     412
     413        $open_elements = array();
     414        foreach ( $this->state->stack_of_open_elements->stack as $item ) {
     415            $open_elements[] = $item->node_name;
     416        }
     417
     418        $active_formats = array();
     419        foreach ( $this->state->active_formatting_elements->walk_down() as $item ) {
     420            $active_formats[] = $item->node_name;
     421        }
     422
     423        $this->last_error = self::ERROR_UNSUPPORTED;
     424
     425        $this->unsupported_exception = new WP_HTML_Unsupported_Exception(
     426            $message,
     427            $this->state->current_token->node_name,
     428            $here->start,
     429            $token,
     430            $open_elements,
     431            $active_formats
     432        );
     433
     434        throw $this->unsupported_exception;
     435    }
     436
     437    /**
    388438     * Returns the last error, if any.
    389439     *
     
    410460    public function get_last_error() {
    411461        return $this->last_error;
     462    }
     463
     464    /**
     465     * Returns context for why the parser aborted due to unsupported HTML, if it did.
     466     *
     467     * This is meant for debugging purposes, not for production use.
     468     *
     469     * @since 6.7.0
     470     *
     471     * @see self::$unsupported_exception
     472     *
     473     * @return WP_HTML_Unsupported_Exception|null
     474     */
     475    public function get_unsupported_exception() {
     476        return $this->unsupported_exception;
    412477    }
    413478
     
    842907                // This should be unreachable but PHP doesn't have total type checking on switch.
    843908                default:
    844                     $this->last_error = self::ERROR_UNSUPPORTED;
    845                     throw new WP_HTML_Unsupported_Exception( "Found unrecognized insertion mode '{$this->state->insertion_mode}'." );
     909                    $this->bail( "Unaware of the requested parsing mode: '{$this->state->insertion_mode}'." );
    846910            }
    847911        } catch ( WP_HTML_Unsupported_Exception $e ) {
     
    923987     */
    924988    private function step_initial() {
    925         $this->last_error = self::ERROR_UNSUPPORTED;
    926         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     989        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    927990    }
    928991
     
    9431006     */
    9441007    private function step_before_html() {
    945         $this->last_error = self::ERROR_UNSUPPORTED;
    946         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     1008        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    9471009    }
    9481010
     
    9631025     */
    9641026    private function step_before_head() {
    965         $this->last_error = self::ERROR_UNSUPPORTED;
    966         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     1027        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    9671028    }
    9681029
     
    9831044     */
    9841045    private function step_in_head() {
    985         $this->last_error = self::ERROR_UNSUPPORTED;
    986         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     1046        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    9871047    }
    9881048
     
    10031063     */
    10041064    private function step_in_head_noscript() {
    1005         $this->last_error = self::ERROR_UNSUPPORTED;
    1006         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     1065        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    10071066    }
    10081067
     
    10231082     */
    10241083    private function step_after_head() {
    1025         $this->last_error = self::ERROR_UNSUPPORTED;
    1026         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     1084        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    10271085    }
    10281086
     
    14461504             */
    14471505            case '-BR':
    1448                 $this->last_error = self::ERROR_UNSUPPORTED;
    1449                 throw new WP_HTML_Unsupported_Exception( 'Closing BR tags require unimplemented special handling.' );
     1506                $this->bail( 'Closing BR tags require unimplemented special handling.' );
     1507                // This return required because PHPCS can't determine that the call to bail() throws.
     1508                return false;
    14501509
    14511510            /*
     
    16031662            case 'TR':
    16041663            case 'XMP':
    1605                 $this->last_error = self::ERROR_UNSUPPORTED;
    1606                 throw new WP_HTML_Unsupported_Exception( "Cannot process {$token_name} element." );
     1664                $this->bail( "Cannot process {$token_name} element." );
    16071665        }
    16081666
     
    16661724     */
    16671725    private function step_in_table() {
    1668         $this->last_error = self::ERROR_UNSUPPORTED;
    1669         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     1726        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    16701727    }
    16711728
     
    16861743     */
    16871744    private function step_in_table_text() {
    1688         $this->last_error = self::ERROR_UNSUPPORTED;
    1689         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     1745        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    16901746    }
    16911747
     
    17061762     */
    17071763    private function step_in_caption() {
    1708         $this->last_error = self::ERROR_UNSUPPORTED;
    1709         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     1764        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    17101765    }
    17111766
     
    17261781     */
    17271782    private function step_in_column_group() {
    1728         $this->last_error = self::ERROR_UNSUPPORTED;
    1729         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     1783        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    17301784    }
    17311785
     
    17461800     */
    17471801    private function step_in_table_body() {
    1748         $this->last_error = self::ERROR_UNSUPPORTED;
    1749         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     1802        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    17501803    }
    17511804
     
    17661819     */
    17671820    private function step_in_row() {
    1768         $this->last_error = self::ERROR_UNSUPPORTED;
    1769         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     1821        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    17701822    }
    17711823
     
    17861838     */
    17871839    private function step_in_cell() {
    1788         $this->last_error = self::ERROR_UNSUPPORTED;
    1789         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     1840        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    17901841    }
    17911842
     
    19872038     */
    19882039    private function step_in_select_in_table() {
    1989         $this->last_error = self::ERROR_UNSUPPORTED;
    1990         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     2040        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    19912041    }
    19922042
     
    20072057     */
    20082058    private function step_in_template() {
    2009         $this->last_error = self::ERROR_UNSUPPORTED;
    2010         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     2059        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    20112060    }
    20122061
     
    20272076     */
    20282077    private function step_after_body() {
    2029         $this->last_error = self::ERROR_UNSUPPORTED;
    2030         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     2078        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    20312079    }
    20322080
     
    20472095     */
    20482096    private function step_in_frameset() {
    2049         $this->last_error = self::ERROR_UNSUPPORTED;
    2050         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     2097        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    20512098    }
    20522099
     
    20672114     */
    20682115    private function step_after_frameset() {
    2069         $this->last_error = self::ERROR_UNSUPPORTED;
    2070         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     2116        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    20712117    }
    20722118
     
    20872133     */
    20882134    private function step_after_after_body() {
    2089         $this->last_error = self::ERROR_UNSUPPORTED;
    2090         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     2135        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    20912136    }
    20922137
     
    21072152     */
    21082153    private function step_after_after_frameset() {
    2109         $this->last_error = self::ERROR_UNSUPPORTED;
    2110         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     2154        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    21112155    }
    21122156
     
    21272171     */
    21282172    private function step_in_foreign_content() {
    2129         $this->last_error = self::ERROR_UNSUPPORTED;
    2130         throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
     2173        $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." );
    21312174    }
    21322175
     
    28362879        }
    28372880
    2838         $this->last_error = self::ERROR_UNSUPPORTED;
    2839         throw new WP_HTML_Unsupported_Exception( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' );
     2881        $this->bail( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' );
    28402882    }
    28412883
     
    30733115            // > If there is no such element, then return and instead act as described in the "any other end tag" entry above.
    30743116            if ( null === $formatting_element ) {
    3075                 $this->last_error = self::ERROR_UNSUPPORTED;
    3076                 throw new WP_HTML_Unsupported_Exception( 'Cannot run adoption agency when "any other end tag" is required.' );
     3117                $this->bail( 'Cannot run adoption agency when "any other end tag" is required.' );
    30773118            }
    30783119
     
    31263167            }
    31273168
    3128             $this->last_error = self::ERROR_UNSUPPORTED;
    3129             throw new WP_HTML_Unsupported_Exception( 'Cannot extract common ancestor in adoption agency algorithm.' );
    3130         }
    3131 
    3132         $this->last_error = self::ERROR_UNSUPPORTED;
    3133         throw new WP_HTML_Unsupported_Exception( 'Cannot run adoption agency when looping required.' );
     3169            $this->bail( 'Cannot extract common ancestor in adoption agency algorithm.' );
     3170        }
     3171
     3172        $this->bail( 'Cannot run adoption agency when looping required.' );
    31343173    }
    31353174
  • trunk/src/wp-includes/html-api/class-wp-html-unsupported-exception.php

    r56274 r58714  
    2222 *
    2323 * @since 6.4.0
     24 * @since 6.7.0 Gained contextual information for use in debugging parse failures.
    2425 *
    2526 * @access private
     
    2829 */
    2930class WP_HTML_Unsupported_Exception extends Exception {
     31    /**
     32     * Name of the matched token when the exception was raised,
     33     * if matched on a token.
     34     *
     35     * This does not imply that the token itself was unsupported, but it
     36     * may have been the case that the token triggered part of the HTML
     37     * parsing that isn't supported, such as the adoption agency algorithm.
     38     *
     39     * @since 6.7.0
     40     *
     41     * @var string
     42     */
     43    public $token_name;
    3044
     45    /**
     46     * Number of bytes into the input HTML document where the parser was
     47     * parsing when the exception was raised.
     48     *
     49     * Use this to reconstruct context for the failure.
     50     *
     51     * @since 6.7.0
     52     *
     53     * @var int
     54     */
     55    public $token_at;
     56
     57    /**
     58     * Full raw text of the matched token when the exception was raised,
     59     * if matched on a token.
     60     *
     61     * Whereas the `$token_name` will be normalized, this contains the full
     62     * raw text of the token, including original casing, duplicated attributes,
     63     * and other syntactic variations that are normally abstracted in the HTML API.
     64     *
     65     * @since 6.7.0
     66     *
     67     * @var string
     68     */
     69    public $token;
     70
     71    /**
     72     * Stack of open elements when the exception was raised.
     73     *
     74     * Use this to trace the parsing circumstances which led to the exception.
     75     *
     76     * @since 6.7.0
     77     *
     78     * @var string[]
     79     */
     80    public $stack_of_open_elements = array();
     81
     82    /**
     83     * List of active formatting elements when the exception was raised.
     84     *
     85     * Use this to trace the parsing circumstances which led to the exception.
     86     *
     87     * @since 6.7.0
     88     *
     89     * @var string[]
     90     */
     91    public $active_formatting_elements = array();
     92
     93    /**
     94     * Constructor function.
     95     *
     96     * @since 6.7.0
     97     *
     98     * @param string   $message                    Brief message explaining what is unsupported, the reason this exception was raised.
     99     * @param string   $token_name                 Normalized name of matched token when this exception was raised.
     100     * @param int      $token_at                   Number of bytes into source HTML document where matched token starts.
     101     * @param string   $token                      Full raw text of matched token when this exception was raised.
     102     * @param string[] $stack_of_open_elements     Stack of open elements when this exception was raised.
     103     * @param string[] $active_formatting_elements List of active formatting elements when this exception was raised.
     104     */
     105    public function __construct( string $message, string $token_name, int $token_at, string $token, array $stack_of_open_elements, array $active_formatting_elements ) {
     106        parent::__construct( $message );
     107
     108        $this->token_name = $token_name;
     109        $this->token_at   = $token_at;
     110        $this->token      = $token;
     111
     112        $this->stack_of_open_elements     = $stack_of_open_elements;
     113        $this->active_formatting_elements = $active_formatting_elements;
     114    }
    31115}
Note: See TracChangeset for help on using the changeset viewer.