- Timestamp:
- 09/03/2024 07:48:57 PM (15 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/html-api/class-wp-html-tag-processor.php
r58970 r58977 3338 3338 3339 3339 /** 3340 * Subdivides a matched text node or CDATA text node, splitting NULL byte sequences3341 * and decoded whitespace as distinctprefixes.3340 * Subdivides a matched text node, splitting NULL byte sequences and decoded whitespace as 3341 * distinct nodes prefixes. 3342 3342 * 3343 3343 * Note that once anything that's neither a NULL byte nor decoded whitespace is … … 3369 3369 */ 3370 3370 public function subdivide_text_appropriately(): bool { 3371 if ( self::STATE_TEXT_NODE !== $this->parser_state ) { 3372 return false; 3373 } 3374 3371 3375 $this->text_node_classification = self::TEXT_IS_GENERIC; 3372 3376 3373 if ( self::STATE_TEXT_NODE === $this->parser_state ) { 3374 /* 3375 * NULL bytes are treated categorically different than numeric character 3376 * references whose number is zero. `�` is not the same as `"\x00"`. 3377 */ 3378 $leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length ); 3379 if ( $leading_nulls > 0 ) { 3380 $this->token_length = $leading_nulls; 3381 $this->text_length = $leading_nulls; 3382 $this->bytes_already_parsed = $this->token_starts_at + $leading_nulls; 3383 $this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE; 3384 return true; 3385 } 3386 3387 /* 3388 * Start a decoding loop to determine the point at which the 3389 * text subdivides. This entails raw whitespace bytes and any 3390 * character reference that decodes to the same. 3391 */ 3392 $at = $this->text_starts_at; 3393 $end = $this->text_starts_at + $this->text_length; 3394 while ( $at < $end ) { 3395 $skipped = strspn( $this->html, " \t\f\r\n", $at, $end - $at ); 3396 $at += $skipped; 3397 3398 if ( $at < $end && '&' === $this->html[ $at ] ) { 3399 $matched_byte_length = null; 3400 $replacement = WP_HTML_Decoder::read_character_reference( 'data', $this->html, $at, $matched_byte_length ); 3401 if ( isset( $replacement ) && 1 === strspn( $replacement, " \t\f\r\n" ) ) { 3402 $at += $matched_byte_length; 3403 continue; 3404 } 3377 /* 3378 * NULL bytes are treated categorically different than numeric character 3379 * references whose number is zero. `�` is not the same as `"\x00"`. 3380 */ 3381 $leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length ); 3382 if ( $leading_nulls > 0 ) { 3383 $this->token_length = $leading_nulls; 3384 $this->text_length = $leading_nulls; 3385 $this->bytes_already_parsed = $this->token_starts_at + $leading_nulls; 3386 $this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE; 3387 return true; 3388 } 3389 3390 /* 3391 * Start a decoding loop to determine the point at which the 3392 * text subdivides. This entails raw whitespace bytes and any 3393 * character reference that decodes to the same. 3394 */ 3395 $at = $this->text_starts_at; 3396 $end = $this->text_starts_at + $this->text_length; 3397 while ( $at < $end ) { 3398 $skipped = strspn( $this->html, " \t\f\r\n", $at, $end - $at ); 3399 $at += $skipped; 3400 3401 if ( $at < $end && '&' === $this->html[ $at ] ) { 3402 $matched_byte_length = null; 3403 $replacement = WP_HTML_Decoder::read_character_reference( 'data', $this->html, $at, $matched_byte_length ); 3404 if ( isset( $replacement ) && 1 === strspn( $replacement, " \t\f\r\n" ) ) { 3405 $at += $matched_byte_length; 3406 continue; 3405 3407 } 3406 3407 break; 3408 } 3409 3410 if ( $at > $this->text_starts_at ) { 3411 $new_length = $at - $this->text_starts_at; 3412 $this->text_length = $new_length; 3413 $this->token_length = $new_length; 3414 $this->bytes_already_parsed = $at; 3415 $this->text_node_classification = self::TEXT_IS_WHITESPACE; 3416 return true; 3417 } 3418 3419 return false; 3420 } 3421 3422 // Unlike text nodes, there are no character references within CDATA sections. 3423 if ( self::STATE_CDATA_NODE === $this->parser_state ) { 3424 $leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length ); 3425 if ( $leading_nulls === $this->text_length ) { 3426 $this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE; 3427 return true; 3428 } 3429 3430 $leading_ws = strspn( $this->html, " \t\f\r\n", $this->text_starts_at, $this->text_length ); 3431 if ( $leading_ws === $this->text_length ) { 3432 $this->text_node_classification = self::TEXT_IS_WHITESPACE; 3433 return true; 3434 } 3408 } 3409 3410 break; 3411 } 3412 3413 if ( $at > $this->text_starts_at ) { 3414 $new_length = $at - $this->text_starts_at; 3415 $this->text_length = $new_length; 3416 $this->token_length = $new_length; 3417 $this->bytes_already_parsed = $at; 3418 $this->text_node_classification = self::TEXT_IS_WHITESPACE; 3419 return true; 3435 3420 } 3436 3421
Note: See TracChangeset
for help on using the changeset viewer.