Changeset 58613
- Timestamp:
- 07/01/2024 11:34:19 PM (10 months ago)
- Location:
- trunk/src/wp-includes/html-api
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/html-api/class-wp-html-decoder.php
r58281 r58613 142 142 while ( $at < $end ) { 143 143 $next_character_reference_at = strpos( $text, '&', $at ); 144 if ( false === $next_character_reference_at || $next_character_reference_at >= $end) {144 if ( false === $next_character_reference_at ) { 145 145 break; 146 146 } … … 437 437 438 438 if ( $code_point <= 0x7FF ) { 439 $byte1 = ( $code_point >> 6 ) | 0xC0;440 $byte2 = $code_point & 0x3F | 0x80;441 442 return pack( 'CC', $byte1, $byte2 );439 $byte1 = chr( ( $code_point >> 6 ) | 0xC0 ); 440 $byte2 = chr( $code_point & 0x3F | 0x80 ); 441 442 return "{$byte1}{$byte2}"; 443 443 } 444 444 445 445 if ( $code_point <= 0xFFFF ) { 446 $byte1 = ( $code_point >> 12 ) | 0xE0;447 $byte2 = ( $code_point >> 6 ) & 0x3F | 0x80;448 $byte3 = $code_point & 0x3F | 0x80;449 450 return pack( 'CCC', $byte1, $byte2, $byte3 );446 $byte1 = chr( ( $code_point >> 12 ) | 0xE0 ); 447 $byte2 = chr( ( $code_point >> 6 ) & 0x3F | 0x80 ); 448 $byte3 = chr( $code_point & 0x3F | 0x80 ); 449 450 return "{$byte1}{$byte2}{$byte3}"; 451 451 } 452 452 453 453 // Any values above U+10FFFF are eliminated above in the pre-check. 454 $byte1 = ( $code_point >> 18 ) | 0xF0;455 $byte2 = ( $code_point >> 12 ) & 0x3F | 0x80;456 $byte3 = ( $code_point >> 6 ) & 0x3F | 0x80;457 $byte4 = $code_point & 0x3F | 0x80;458 459 return pack( 'CCCC', $byte1, $byte2, $byte3, $byte4 );454 $byte1 = chr( ( $code_point >> 18 ) | 0xF0 ); 455 $byte2 = chr( ( $code_point >> 12 ) & 0x3F | 0x80 ); 456 $byte3 = chr( ( $code_point >> 6 ) & 0x3F | 0x80 ); 457 $byte4 = chr( $code_point & 0x3F | 0x80 ); 458 459 return "{$byte1}{$byte2}{$byte3}{$byte4}"; 460 460 } 461 461 } -
trunk/src/wp-includes/html-api/class-wp-html-tag-processor.php
r58559 r58613 1525 1525 $at = $was_at; 1526 1526 1527 while ( false !== $at &&$at < $doc_length ) {1527 while ( $at < $doc_length ) { 1528 1528 $at = strpos( $html, '<', $at ); 1529 1530 /*1531 * This does not imply an incomplete parse; it indicates that there1532 * can be nothing left in the document other than a #text node.1533 */1534 1529 if ( false === $at ) { 1535 $this->parser_state = self::STATE_TEXT_NODE; 1536 $this->token_starts_at = $was_at; 1537 $this->token_length = strlen( $html ) - $was_at; 1538 $this->text_starts_at = $was_at; 1539 $this->text_length = $this->token_length; 1540 $this->bytes_already_parsed = strlen( $html ); 1541 return true; 1530 break; 1542 1531 } 1543 1532 … … 1555 1544 * @see https://html.spec.whatwg.org/#tag-open-state 1556 1545 */ 1557 if ( strlen( $html ) > $at + 1 ) { 1558 $next_character = $html[ $at + 1 ]; 1559 $at_another_node = ( 1560 '!' === $next_character || 1561 '/' === $next_character || 1562 '?' === $next_character || 1563 ( 'A' <= $next_character && $next_character <= 'Z' ) || 1564 ( 'a' <= $next_character && $next_character <= 'z' ) 1565 ); 1566 if ( ! $at_another_node ) { 1567 ++$at; 1568 continue; 1569 } 1546 if ( 1 !== strspn( $html, '!/?abcdefghijklmnopqrstuvwxyzABCEFGHIJKLMNOPQRSTUVWXYZ', $at + 1, 1 ) ) { 1547 ++$at; 1548 continue; 1570 1549 } 1571 1550 … … 1631 1610 * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state 1632 1611 */ 1633 if ( 1634 $doc_length > $at + 3 && 1635 '-' === $html[ $at + 2 ] && 1636 '-' === $html[ $at + 3 ] 1637 ) { 1612 if ( 0 === substr_compare( $html, '--', $at + 2, 2 ) ) { 1638 1613 $closer_at = $at + 4; 1639 1614 // If it's not possible to close the comment then there is nothing more to scan. … … 1912 1887 } 1913 1888 1914 return false; 1889 /* 1890 * This does not imply an incomplete parse; it indicates that there 1891 * can be nothing left in the document other than a #text node. 1892 */ 1893 $this->parser_state = self::STATE_TEXT_NODE; 1894 $this->token_starts_at = $was_at; 1895 $this->token_length = $doc_length - $was_at; 1896 $this->text_starts_at = $was_at; 1897 $this->text_length = $this->token_length; 1898 $this->bytes_already_parsed = $doc_length; 1899 return true; 1915 1900 } 1916 1901 … … 1923 1908 */ 1924 1909 private function parse_next_attribute() { 1910 $doc_length = strlen( $this->html ); 1911 1925 1912 // Skip whitespace and slashes. 1926 1913 $this->bytes_already_parsed += strspn( $this->html, " \t\f\r\n/", $this->bytes_already_parsed ); 1927 if ( $this->bytes_already_parsed >= strlen( $this->html )) {1914 if ( $this->bytes_already_parsed >= $doc_length ) { 1928 1915 $this->parser_state = self::STATE_INCOMPLETE_INPUT; 1929 1916 … … 1942 1929 1943 1930 // No attribute, just tag closer. 1944 if ( 0 === $name_length || $this->bytes_already_parsed + $name_length >= strlen( $this->html )) {1931 if ( 0 === $name_length || $this->bytes_already_parsed + $name_length >= $doc_length ) { 1945 1932 return false; 1946 1933 } … … 1949 1936 $attribute_name = substr( $this->html, $attribute_start, $name_length ); 1950 1937 $this->bytes_already_parsed += $name_length; 1951 if ( $this->bytes_already_parsed >= strlen( $this->html )) {1938 if ( $this->bytes_already_parsed >= $doc_length ) { 1952 1939 $this->parser_state = self::STATE_INCOMPLETE_INPUT; 1953 1940 … … 1956 1943 1957 1944 $this->skip_whitespace(); 1958 if ( $this->bytes_already_parsed >= strlen( $this->html )) {1945 if ( $this->bytes_already_parsed >= $doc_length ) { 1959 1946 $this->parser_state = self::STATE_INCOMPLETE_INPUT; 1960 1947 … … 1966 1953 ++$this->bytes_already_parsed; 1967 1954 $this->skip_whitespace(); 1968 if ( $this->bytes_already_parsed >= strlen( $this->html )) {1955 if ( $this->bytes_already_parsed >= $doc_length ) { 1969 1956 $this->parser_state = self::STATE_INCOMPLETE_INPUT; 1970 1957 … … 1977 1964 $quote = $this->html[ $this->bytes_already_parsed ]; 1978 1965 $value_start = $this->bytes_already_parsed + 1; 1979 $value_length = strcspn( $this->html, $quote, $value_start ); 1980 $attribute_end = $value_start + $value_length + 1; 1966 $end_quote_at = strpos( $this->html, $quote, $value_start ); 1967 $end_quote_at = false === $end_quote_at ? $doc_length : $end_quote_at; 1968 $value_length = $end_quote_at - $value_start; 1969 $attribute_end = $end_quote_at + 1; 1981 1970 $this->bytes_already_parsed = $attribute_end; 1982 1971 break; … … 1994 1983 } 1995 1984 1996 if ( $attribute_end >= strlen( $this->html )) {1985 if ( $attribute_end >= $doc_length ) { 1997 1986 $this->parser_state = self::STATE_INCOMPLETE_INPUT; 1998 1987 … … 2015 2004 2016 2005 // If an attribute is listed many times, only use the first declaration and ignore the rest. 2017 if ( ! array_key_exists( $comparable_name, $this->attributes) ) {2006 if ( ! isset( $this->attributes[ $comparable_name ] ) ) { 2018 2007 $this->attributes[ $comparable_name ] = new WP_HTML_Attribute_Token( 2019 2008 $attribute_name, … … 2039 2028 if ( null === $this->duplicate_attributes ) { 2040 2029 $this->duplicate_attributes = array( $comparable_name => array( $duplicate_span ) ); 2041 } elseif ( ! array_key_exists( $comparable_name, $this->duplicate_attributes) ) {2030 } elseif ( ! isset( $this->duplicate_attributes[ $comparable_name ] ) ) { 2042 2031 $this->duplicate_attributes[ $comparable_name ] = array( $duplicate_span ); 2043 2032 } else { … … 3111 3100 3112 3101 // Removes any duplicated attributes if they were also present. 3113 if ( null !== $this->duplicate_attributes && array_key_exists( $name, $this->duplicate_attributes ) ) { 3114 foreach ( $this->duplicate_attributes[ $name ] as $attribute_token ) { 3115 $this->lexical_updates[] = new WP_HTML_Text_Replacement( 3116 $attribute_token->start, 3117 $attribute_token->length, 3118 '' 3119 ); 3120 } 3102 foreach ( $this->duplicate_attributes[ $name ] ?? array() as $attribute_token ) { 3103 $this->lexical_updates[] = new WP_HTML_Text_Replacement( 3104 $attribute_token->start, 3105 $attribute_token->length, 3106 '' 3107 ); 3121 3108 } 3122 3109 … … 3318 3305 3319 3306 // Does the tag name match the requested tag name in a case-insensitive manner? 3320 if ( null !== $this->sought_tag_name ) { 3321 /* 3322 * String (byte) length lookup is fast. If they aren't the 3323 * same length then they can't be the same string values. 3324 */ 3325 if ( strlen( $this->sought_tag_name ) !== $this->tag_name_length ) { 3326 return false; 3327 } 3328 3329 /* 3330 * Check each character to determine if they are the same. 3331 * Defer calls to `strtoupper()` to avoid them when possible. 3332 * Calling `strcasecmp()` here tested slowed than comparing each 3333 * character, so unless benchmarks show otherwise, it should 3334 * not be used. 3335 * 3336 * It's expected that most of the time that this runs, a 3337 * lower-case tag name will be supplied and the input will 3338 * contain lower-case tag names, thus normally bypassing 3339 * the case comparison code. 3340 */ 3341 for ( $i = 0; $i < $this->tag_name_length; $i++ ) { 3342 $html_char = $this->html[ $this->tag_name_starts_at + $i ]; 3343 $tag_char = $this->sought_tag_name[ $i ]; 3344 3345 if ( $html_char !== $tag_char && strtoupper( $html_char ) !== $tag_char ) { 3346 return false; 3347 } 3348 } 3307 if ( isset( $this->sought_tag_name ) && 0 !== substr_compare( $this->html, $this->sought_tag_name, $this->tag_name_starts_at, $this->tag_name_length, true ) ) { 3308 return false; 3349 3309 } 3350 3310
Note: See TracChangeset
for help on using the changeset viewer.