Changeset 58977
- Timestamp:
- 09/03/2024 07:48:57 PM (6 weeks ago)
- Location:
- trunk/src/wp-includes/html-api
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/html-api/class-wp-html-processor.php
r58970 r58977 844 844 if ( self::PROCESS_NEXT_NODE === $node_to_process ) { 845 845 parent::next_token(); 846 if ( 847 WP_HTML_Tag_Processor::STATE_TEXT_NODE === $this->parser_state || 848 WP_HTML_Tag_Processor::STATE_CDATA_NODE === $this->parser_state 849 ) { 846 if ( WP_HTML_Tag_Processor::STATE_TEXT_NODE === $this->parser_state ) { 850 847 parent::subdivide_text_appropriately(); 851 848 } … … 4376 4373 4377 4374 switch ( $op ) { 4378 case '#cdata-section':4379 4375 case '#text': 4380 4376 /* … … 4390 4386 */ 4391 4387 if ( parent::TEXT_IS_GENERIC === $this->text_node_classification ) { 4388 $this->state->frameset_ok = false; 4389 } 4390 4391 $this->insert_foreign_element( $this->state->current_token, false ); 4392 return true; 4393 4394 /* 4395 * CDATA sections are alternate wrappers for text content and therefore 4396 * ought to follow the same rules as text nodes. 4397 */ 4398 case '#cdata-section': 4399 /* 4400 * NULL bytes and whitespace do not change the frameset-ok flag. 4401 */ 4402 $current_token = $this->bookmarks[ $this->state->current_token->bookmark_name ]; 4403 $cdata_content_start = $current_token->start + 9; 4404 $cdata_content_length = $current_token->length - 12; 4405 if ( strspn( $this->html, "\0 \t\n\f\r", $cdata_content_start, $cdata_content_length ) !== $cdata_content_length ) { 4392 4406 $this->state->frameset_ok = false; 4393 4407 } -
trunk/src/wp-includes/html-api/class-wp-html-tag-processor.php
r58970 r58977 3338 3338 3339 3339 /** 3340 * Subdivides a matched text node or CDATA text node, splitting NULL byte sequences3341 * and decoded whitespace as distinctprefixes.3340 * Subdivides a matched text node, splitting NULL byte sequences and decoded whitespace as 3341 * distinct nodes prefixes. 3342 3342 * 3343 3343 * Note that once anything that's neither a NULL byte nor decoded whitespace is … … 3369 3369 */ 3370 3370 public function subdivide_text_appropriately(): bool { 3371 if ( self::STATE_TEXT_NODE !== $this->parser_state ) { 3372 return false; 3373 } 3374 3371 3375 $this->text_node_classification = self::TEXT_IS_GENERIC; 3372 3376 3373 if ( self::STATE_TEXT_NODE === $this->parser_state ) { 3374 /* 3375 * NULL bytes are treated categorically different than numeric character 3376 * references whose number is zero. `�` is not the same as `"\x00"`. 3377 */ 3378 $leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length ); 3379 if ( $leading_nulls > 0 ) { 3380 $this->token_length = $leading_nulls; 3381 $this->text_length = $leading_nulls; 3382 $this->bytes_already_parsed = $this->token_starts_at + $leading_nulls; 3383 $this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE; 3384 return true; 3385 } 3386 3387 /* 3388 * Start a decoding loop to determine the point at which the 3389 * text subdivides. This entails raw whitespace bytes and any 3390 * character reference that decodes to the same. 3391 */ 3392 $at = $this->text_starts_at; 3393 $end = $this->text_starts_at + $this->text_length; 3394 while ( $at < $end ) { 3395 $skipped = strspn( $this->html, " \t\f\r\n", $at, $end - $at ); 3396 $at += $skipped; 3397 3398 if ( $at < $end && '&' === $this->html[ $at ] ) { 3399 $matched_byte_length = null; 3400 $replacement = WP_HTML_Decoder::read_character_reference( 'data', $this->html, $at, $matched_byte_length ); 3401 if ( isset( $replacement ) && 1 === strspn( $replacement, " \t\f\r\n" ) ) { 3402 $at += $matched_byte_length; 3403 continue; 3404 } 3377 /* 3378 * NULL bytes are treated categorically different than numeric character 3379 * references whose number is zero. `�` is not the same as `"\x00"`. 3380 */ 3381 $leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length ); 3382 if ( $leading_nulls > 0 ) { 3383 $this->token_length = $leading_nulls; 3384 $this->text_length = $leading_nulls; 3385 $this->bytes_already_parsed = $this->token_starts_at + $leading_nulls; 3386 $this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE; 3387 return true; 3388 } 3389 3390 /* 3391 * Start a decoding loop to determine the point at which the 3392 * text subdivides. This entails raw whitespace bytes and any 3393 * character reference that decodes to the same. 3394 */ 3395 $at = $this->text_starts_at; 3396 $end = $this->text_starts_at + $this->text_length; 3397 while ( $at < $end ) { 3398 $skipped = strspn( $this->html, " \t\f\r\n", $at, $end - $at ); 3399 $at += $skipped; 3400 3401 if ( $at < $end && '&' === $this->html[ $at ] ) { 3402 $matched_byte_length = null; 3403 $replacement = WP_HTML_Decoder::read_character_reference( 'data', $this->html, $at, $matched_byte_length ); 3404 if ( isset( $replacement ) && 1 === strspn( $replacement, " \t\f\r\n" ) ) { 3405 $at += $matched_byte_length; 3406 continue; 3405 3407 } 3406 3407 break; 3408 } 3409 3410 if ( $at > $this->text_starts_at ) { 3411 $new_length = $at - $this->text_starts_at; 3412 $this->text_length = $new_length; 3413 $this->token_length = $new_length; 3414 $this->bytes_already_parsed = $at; 3415 $this->text_node_classification = self::TEXT_IS_WHITESPACE; 3416 return true; 3417 } 3418 3419 return false; 3420 } 3421 3422 // Unlike text nodes, there are no character references within CDATA sections. 3423 if ( self::STATE_CDATA_NODE === $this->parser_state ) { 3424 $leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length ); 3425 if ( $leading_nulls === $this->text_length ) { 3426 $this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE; 3427 return true; 3428 } 3429 3430 $leading_ws = strspn( $this->html, " \t\f\r\n", $this->text_starts_at, $this->text_length ); 3431 if ( $leading_ws === $this->text_length ) { 3432 $this->text_node_classification = self::TEXT_IS_WHITESPACE; 3433 return true; 3434 } 3408 } 3409 3410 break; 3411 } 3412 3413 if ( $at > $this->text_starts_at ) { 3414 $new_length = $at - $this->text_starts_at; 3415 $this->text_length = $new_length; 3416 $this->token_length = $new_length; 3417 $this->bytes_already_parsed = $at; 3418 $this->text_node_classification = self::TEXT_IS_WHITESPACE; 3419 return true; 3435 3420 } 3436 3421
Note: See TracChangeset
for help on using the changeset viewer.