- Timestamp:
- 06/03/2024 07:45:57 PM (6 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/html-api/class-wp-html-processor.php
r58192 r58304 202 202 private $release_internal_bookmark_on_destruct = null; 203 203 204 /** 205 * Stores stack events which arise during parsing of the 206 * HTML document, which will then supply the "match" events. 207 * 208 * @since 6.6.0 209 * 210 * @var WP_HTML_Stack_Event[] 211 */ 212 private $element_queue = array(); 213 214 /** 215 * Current stack event, if set, representing a matched token. 216 * 217 * Because the parser may internally point to a place further along in a document 218 * than the nodes which have already been processed (some "virtual" nodes may have 219 * appeared while scanning the HTML document), this will point at the "current" node 220 * being processed. It comes from the front of the element queue. 221 * 222 * @since 6.6.0 223 * 224 * @var ?WP_HTML_Stack_Event 225 */ 226 private $current_element = null; 227 228 /** 229 * Context node if created as a fragment parser. 230 * 231 * @var ?WP_HTML_Token 232 */ 233 private $context_node = null; 234 235 /** 236 * Whether the parser has yet processed the context node, 237 * if created as a fragment parser. 238 * 239 * The context node will be initially pushed onto the stack of open elements, 240 * but when created as a fragment parser, this context element (and the implicit 241 * HTML document node above it) should not be exposed as a matched token or node. 242 * 243 * This boolean indicates whether the processor should skip over the current 244 * node in its initial search for the first node created from the input HTML. 245 * 246 * @var bool 247 */ 248 private $has_seen_context_node = false; 249 204 250 /* 205 251 * Public Interface Functions … … 258 304 ); 259 305 260 $processor->state->stack_of_open_elements->push( 261 new WP_HTML_Token( 262 'context-node', 263 $processor->state->context_node[0], 264 false 265 ) 306 $context_node = new WP_HTML_Token( 307 'context-node', 308 $processor->state->context_node[0], 309 false 266 310 ); 311 312 $processor->state->stack_of_open_elements->push( $context_node ); 313 $processor->context_node = $context_node; 267 314 268 315 return $processor; … … 300 347 $this->state = new WP_HTML_Processor_State(); 301 348 349 $this->state->stack_of_open_elements->set_push_handler( 350 function ( WP_HTML_Token $token ) { 351 $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH ); 352 } 353 ); 354 355 $this->state->stack_of_open_elements->set_pop_handler( 356 function ( WP_HTML_Token $token ) { 357 $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::POP ); 358 } 359 ); 360 302 361 /* 303 362 * Create this wrapper so that it's possible to pass … … 343 402 * 344 403 * @since 6.4.0 404 * @since 6.6.0 Visits all tokens, including virtual ones. 345 405 * 346 406 * @throws Exception When unable to allocate a bookmark for the next token in the input HTML document. … … 350 410 * 351 411 * @type string|null $tag_name Which tag to find, or `null` for "any tag." 412 * @type string $tag_closers 'visit' to pause at tag closers, 'skip' or unset to only visit openers. 352 413 * @type int|null $match_offset Find the Nth tag matching all search criteria. 353 414 * 1 for "first" tag, 3 for "third," etc. … … 360 421 */ 361 422 public function next_tag( $query = null ) { 423 $visit_closers = isset( $query['tag_closers'] ) && 'visit' === $query['tag_closers']; 424 362 425 if ( null === $query ) { 363 while ( $this-> step() ) {426 while ( $this->next_token() ) { 364 427 if ( '#tag' !== $this->get_token_type() ) { 365 428 continue; 366 429 } 367 430 368 if ( ! $this ->is_tag_closer()) {431 if ( ! $this::is_tag_closer() || $visit_closers ) { 369 432 return true; 370 433 } … … 392 455 393 456 if ( ! ( array_key_exists( 'breadcrumbs', $query ) && is_array( $query['breadcrumbs'] ) ) ) { 394 while ( $this-> step() ) {457 while ( $this->next_token() ) { 395 458 if ( '#tag' !== $this->get_token_type() ) { 396 459 continue; … … 401 464 } 402 465 403 if ( ! $this->is_tag_closer()) {466 if ( ! parent::is_tag_closer() || $visit_closers ) { 404 467 return true; 405 468 } 406 469 } 407 470 408 return false;409 }410 411 if ( isset( $query['tag_closers'] ) && 'visit' === $query['tag_closers'] ) {412 _doing_it_wrong(413 __METHOD__,414 __( 'Cannot visit tag closers in HTML Processor.' ),415 '6.4.0'416 );417 471 return false; 418 472 } … … 421 475 $match_offset = isset( $query['match_offset'] ) ? (int) $query['match_offset'] : 1; 422 476 423 while ( $match_offset > 0 && $this-> step() ) {424 if ( '#tag' !== $this->get_token_type() ) {477 while ( $match_offset > 0 && $this->next_token() ) { 478 if ( '#tag' !== $this->get_token_type() || $this->is_tag_closer() ) { 425 479 continue; 426 480 } … … 453 507 */ 454 508 public function next_token() { 455 return $this->step(); 509 $this->current_element = null; 510 511 if ( isset( $this->last_error ) ) { 512 return false; 513 } 514 515 if ( 0 === count( $this->element_queue ) && ! $this->step() ) { 516 while ( $this->state->stack_of_open_elements->pop() ) { 517 continue; 518 } 519 } 520 521 $this->current_element = array_shift( $this->element_queue ); 522 while ( isset( $this->context_node ) && ! $this->has_seen_context_node ) { 523 if ( isset( $this->current_element ) ) { 524 if ( $this->context_node === $this->current_element->token && WP_HTML_Stack_Event::PUSH === $this->current_element->operation ) { 525 $this->has_seen_context_node = true; 526 return $this->next_token(); 527 } 528 } 529 $this->current_element = array_shift( $this->element_queue ); 530 } 531 532 if ( ! isset( $this->current_element ) ) { 533 return $this->next_token(); 534 } 535 536 if ( isset( $this->context_node ) && WP_HTML_Stack_Event::POP === $this->current_element->operation && $this->context_node === $this->current_element->token ) { 537 $this->element_queue = array(); 538 $this->current_element = null; 539 return false; 540 } 541 542 // Avoid sending close events for elements which don't expect a closing. 543 if ( 544 WP_HTML_Stack_Event::POP === $this->current_element->operation && 545 ! static::expects_closer( $this->current_element->token->node_name ) 546 ) { 547 return $this->next_token(); 548 } 549 550 return true; 551 } 552 553 554 /** 555 * Indicates if the current tag token is a tag closer. 556 * 557 * Example: 558 * 559 * $p = WP_HTML_Processor::create_fragment( '<div></div>' ); 560 * $p->next_tag( array( 'tag_name' => 'div', 'tag_closers' => 'visit' ) ); 561 * $p->is_tag_closer() === false; 562 * 563 * $p->next_tag( array( 'tag_name' => 'div', 'tag_closers' => 'visit' ) ); 564 * $p->is_tag_closer() === true; 565 * 566 * @since 6.6.0 Subclassed for HTML Processor. 567 * 568 * @return bool Whether the current tag is a tag closer. 569 */ 570 public function is_tag_closer() { 571 return isset( $this->current_element ) 572 ? ( WP_HTML_Stack_Event::POP === $this->current_element->operation ) 573 : parent::is_tag_closer(); 456 574 } 457 575 … … 526 644 * SVG and MathML namespace. 527 645 * 646 * @param ?WP_HTML_Token $node Node to examine instead of current node, if provided. 528 647 * @return bool Whether to expect a closer for the currently-matched node, 529 648 * or `null` if not matched on any token. 530 649 */ 531 public function expects_closer( ) {532 $token_name = $ this->get_token_name();650 public function expects_closer( $node = null ) { 651 $token_name = $node->node_name ?? $this->get_token_name(); 533 652 if ( ! isset( $token_name ) ) { 534 653 return null; … … 582 701 */ 583 702 $top_node = $this->state->stack_of_open_elements->current_node(); 584 if ( 585 $top_node && ( 586 // Void elements. 587 self::is_void( $top_node->node_name ) || 588 // Comments, text nodes, and other atomic tokens. 589 '#' === $top_node->node_name[0] || 590 // Doctype declarations. 591 'html' === $top_node->node_name 592 ) 593 ) { 703 if ( isset( $top_node ) && ! static::expects_closer( $top_node ) ) { 594 704 $this->state->stack_of_open_elements->pop(); 595 705 } … … 651 761 * @since 6.4.0 652 762 * 763 * @todo make aware of queue of elements, because stack operations have already been done by now. 764 * 653 765 * @return string[]|null Array of tag names representing path to matched node, if matched, otherwise NULL. 654 766 */ … … 709 821 $token_name = $this->get_token_name(); 710 822 $token_type = $this->get_token_type(); 711 $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';823 $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : ''; 712 824 $op = "{$op_sigil}{$token_name}"; 713 825 … … 1232 1344 } 1233 1345 1234 if ( ! $this->is_tag_closer() ) {1346 if ( ! parent::is_tag_closer() ) { 1235 1347 /* 1236 1348 * > Any other start tag … … 1326 1438 if ( null !== $this->last_error ) { 1327 1439 return null; 1440 } 1441 1442 if ( isset( $this->current_element ) ) { 1443 return $this->current_element->token->node_name; 1328 1444 } 1329 1445 … … 1344 1460 1345 1461 /** 1462 * Returns the node name represented by the token. 1463 * 1464 * This matches the DOM API value `nodeName`. Some values 1465 * are static, such as `#text` for a text node, while others 1466 * are dynamically generated from the token itself. 1467 * 1468 * Dynamic names: 1469 * - Uppercase tag name for tag matches. 1470 * - `html` for DOCTYPE declarations. 1471 * 1472 * Note that if the Tag Processor is not matched on a token 1473 * then this function will return `null`, either because it 1474 * hasn't yet found a token or because it reached the end 1475 * of the document without matching a token. 1476 * 1477 * @since 6.6.0 Subclassed for the HTML Processor. 1478 * 1479 * @return string|null Name of the matched token. 1480 */ 1481 public function get_token_name() { 1482 if ( isset( $this->current_element ) ) { 1483 return $this->current_element->token->node_name; 1484 } 1485 1486 return parent::get_token_name(); 1487 } 1488 1489 /** 1490 * Indicates the kind of matched token, if any. 1491 * 1492 * This differs from `get_token_name()` in that it always 1493 * returns a static string indicating the type, whereas 1494 * `get_token_name()` may return values derived from the 1495 * token itself, such as a tag name or processing 1496 * instruction tag. 1497 * 1498 * Possible values: 1499 * - `#tag` when matched on a tag. 1500 * - `#text` when matched on a text node. 1501 * - `#cdata-section` when matched on a CDATA node. 1502 * - `#comment` when matched on a comment. 1503 * - `#doctype` when matched on a DOCTYPE declaration. 1504 * - `#presumptuous-tag` when matched on an empty tag closer. 1505 * - `#funky-comment` when matched on a funky comment. 1506 * 1507 * @since 6.6.0 Subclassed for the HTML Processor. 1508 * 1509 * @return string|null What kind of token is matched, or null. 1510 */ 1511 public function get_token_type() { 1512 if ( isset( $this->current_element ) ) { 1513 $node_name = $this->current_element->token->node_name; 1514 if ( ctype_upper( $node_name[0] ) ) { 1515 return '#tag'; 1516 } 1517 1518 if ( 'html' === $node_name ) { 1519 return '#doctype'; 1520 } 1521 1522 return $node_name; 1523 } 1524 1525 return parent::get_token_type(); 1526 } 1527 1528 /** 1529 * Returns the value of a requested attribute from a matched tag opener if that attribute exists. 1530 * 1531 * Example: 1532 * 1533 * $p = WP_HTML_Processor::create_fragment( '<div enabled class="test" data-test-id="14">Test</div>' ); 1534 * $p->next_token() === true; 1535 * $p->get_attribute( 'data-test-id' ) === '14'; 1536 * $p->get_attribute( 'enabled' ) === true; 1537 * $p->get_attribute( 'aria-label' ) === null; 1538 * 1539 * $p->next_tag() === false; 1540 * $p->get_attribute( 'class' ) === null; 1541 * 1542 * @since 6.6.0 Subclassed for HTML Processor. 1543 * 1544 * @param string $name Name of attribute whose value is requested. 1545 * @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`. 1546 */ 1547 public function get_attribute( $name ) { 1548 if ( isset( $this->current_element ) ) { 1549 // Closing tokens cannot contain attributes. 1550 if ( WP_HTML_Stack_Event::POP === $this->current_element->operation ) { 1551 return null; 1552 } 1553 1554 $node_name = $this->current_element->token->node_name; 1555 1556 // Only tags can contain attributes. 1557 if ( 'A' > $node_name[0] || 'Z' < $node_name[0] ) { 1558 return null; 1559 } 1560 1561 if ( $this->current_element->token->bookmark_name === (string) $this->bookmark_counter ) { 1562 return parent::get_attribute( $name ); 1563 } 1564 } 1565 1566 return null; 1567 } 1568 1569 /** 1570 * Gets lowercase names of all attributes matching a given prefix in the current tag. 1571 * 1572 * Note that matching is case-insensitive. This is in accordance with the spec: 1573 * 1574 * > There must never be two or more attributes on 1575 * > the same start tag whose names are an ASCII 1576 * > case-insensitive match for each other. 1577 * - HTML 5 spec 1578 * 1579 * Example: 1580 * 1581 * $p = new WP_HTML_Tag_Processor( '<div data-ENABLED class="test" DATA-test-id="14">Test</div>' ); 1582 * $p->next_tag( array( 'class_name' => 'test' ) ) === true; 1583 * $p->get_attribute_names_with_prefix( 'data-' ) === array( 'data-enabled', 'data-test-id' ); 1584 * 1585 * $p->next_tag() === false; 1586 * $p->get_attribute_names_with_prefix( 'data-' ) === null; 1587 * 1588 * @since 6.6.0 Subclassed for the HTML Processor. 1589 * 1590 * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive 1591 * 1592 * @param string $prefix Prefix of requested attribute names. 1593 * @return array|null List of attribute names, or `null` when no tag opener is matched. 1594 */ 1595 public function get_attribute_names_with_prefix( $prefix ) { 1596 if ( isset( $this->current_element ) ) { 1597 if ( WP_HTML_Stack_Event::POP === $this->current_element->operation ) { 1598 return null; 1599 } 1600 1601 $mark = $this->bookmarks[ $this->current_element->token->bookmark_name ]; 1602 if ( 0 === $mark->length ) { 1603 return null; 1604 } 1605 } 1606 1607 return parent::get_attribute_names_with_prefix( $prefix ); 1608 } 1609 1610 /** 1611 * Returns the modifiable text for a matched token, or an empty string. 1612 * 1613 * Modifiable text is text content that may be read and changed without 1614 * changing the HTML structure of the document around it. This includes 1615 * the contents of `#text` nodes in the HTML as well as the inner 1616 * contents of HTML comments, Processing Instructions, and others, even 1617 * though these nodes aren't part of a parsed DOM tree. They also contain 1618 * the contents of SCRIPT and STYLE tags, of TEXTAREA tags, and of any 1619 * other section in an HTML document which cannot contain HTML markup (DATA). 1620 * 1621 * If a token has no modifiable text then an empty string is returned to 1622 * avoid needless crashing or type errors. An empty string does not mean 1623 * that a token has modifiable text, and a token with modifiable text may 1624 * have an empty string (e.g. a comment with no contents). 1625 * 1626 * @since 6.6.0 Subclassed for the HTML Processor. 1627 * 1628 * @return string 1629 */ 1630 public function get_modifiable_text() { 1631 if ( isset( $this->current_element ) ) { 1632 if ( WP_HTML_Stack_Event::POP === $this->current_element->operation ) { 1633 return ''; 1634 } 1635 1636 $mark = $this->bookmarks[ $this->current_element->token->bookmark_name ]; 1637 if ( 0 === $mark->length ) { 1638 return ''; 1639 } 1640 } 1641 return parent::get_modifiable_text(); 1642 } 1643 1644 /** 1346 1645 * Removes a bookmark that is no longer needed. 1347 1646 * … … 1384 1683 : 0; 1385 1684 $bookmark_starts_at = $this->bookmarks[ $actual_bookmark_name ]->start; 1685 $bookmark_length = $this->bookmarks[ $actual_bookmark_name ]->length; 1386 1686 $direction = $bookmark_starts_at > $processor_started_at ? 'forward' : 'backward'; 1387 1687 … … 1439 1739 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; 1440 1740 $this->state->frameset_ok = true; 1741 $this->element_queue = array(); 1742 $this->current_element = null; 1441 1743 } 1442 1744 … … 1446 1748 } 1447 1749 1448 while ( $this-> step() ) {1750 while ( $this->next_token() ) { 1449 1751 if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) { 1752 while ( isset( $this->current_element ) && WP_HTML_Stack_Event::POP === $this->current_element->operation ) { 1753 $this->current_element = array_shift( $this->element_queue ); 1754 } 1450 1755 return true; 1451 1756 }
Note: See TracChangeset
for help on using the changeset viewer.