Changeset 58418
- Timestamp:
- 06/15/2024 06:31:24 AM (6 months ago)
- Location:
- trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/kses.php
r58354 r58418 964 964 * 965 965 * @since 1.0.0 966 * @since 6.6.0 Recognize additional forms of invalid HTML which convert into comments. 966 967 * 967 968 * @global array[]|string $pass_allowed_html An array of allowed HTML elements and attributes, … … 982 983 $pass_allowed_protocols = $allowed_protocols; 983 984 984 return preg_replace_callback( '%(<!--.*?(-->|$))|(<[^>]*(>|$)|>)%', '_wp_kses_split_callback', $content ); 985 $token_pattern = <<<REGEX 986 ~ 987 ( # Detect comments of various flavors before attempting to find tags. 988 (<!--.*?(-->|$)) # - Normative HTML comments. 989 | 990 </[^a-zA-Z][^>]*> # - Closing tags with invalid tag names. 991 ) 992 | 993 (<[^>]*(>|$)|>) # Tag-like spans of text. 994 ~x 995 REGEX; 996 return preg_replace_callback( $token_pattern, '_wp_kses_split_callback', $content ); 985 997 } 986 998 … … 1070 1082 * @ignore 1071 1083 * @since 1.0.0 1084 * @since 6.6.0 Recognize additional forms of invalid HTML which convert into comments. 1072 1085 * 1073 1086 * @param string $content Content to filter. … … 1076 1089 * for the list of accepted context names. 1077 1090 * @param string[] $allowed_protocols Array of allowed URL protocols. 1091 * 1078 1092 * @return string Fixed HTML element 1079 1093 */ … … 1081 1095 $content = wp_kses_stripslashes( $content ); 1082 1096 1083 // It matched a ">" character. 1097 /* 1098 * The regex pattern used to split HTML into chunks attempts 1099 * to split on HTML token boundaries. This function should 1100 * thus receive chunks that _either_ start with meaningful 1101 * syntax tokens, like a tag `<div>` or a comment `<!-- ... -->`. 1102 * 1103 * If the first character of the `$content` chunk _isn't_ one 1104 * of these syntax elements, which always starts with `<`, then 1105 * the match had to be for the final alternation of `>`. In such 1106 * case, it's probably standing on its own and could be encoded 1107 * with a character reference to remove ambiguity. 1108 * 1109 * In other words, if this chunk isn't from a match of a syntax 1110 * token, it's just a plaintext greater-than (`>`) sign. 1111 */ 1084 1112 if ( ! str_starts_with( $content, '<' ) ) { 1085 1113 return '>'; 1086 1114 } 1087 1115 1088 // Allow HTML comments. 1116 /* 1117 * When a closing tag appears with a name that isn't a valid tag name, 1118 * it must be interpreted as an HTML comment. It extends until the 1119 * first `>` character after the initial opening `</`. 1120 * 1121 * Preserve these comments and do not treat them like tags. 1122 */ 1123 if ( 1 === preg_match( '~^</[^a-zA-Z][^>]*>$~', $content ) ) { 1124 $content = substr( $content, 2, -1 ); 1125 $transformed = null; 1126 1127 while ( $transformed !== $content ) { 1128 $transformed = wp_kses( $content, $allowed_html, $allowed_protocols ); 1129 $content = $transformed; 1130 } 1131 1132 return "</{$transformed}>"; 1133 } 1134 1135 /* 1136 * Normative HTML comments should be handled separately as their 1137 * parsing rules differ from those for tags and text nodes. 1138 */ 1089 1139 if ( str_starts_with( $content, '<!--' ) ) { 1090 1140 $content = str_replace( array( '<!--', '-->' ), '', $content ); -
trunk/tests/phpunit/tests/kses.php
r58294 r58418 1933 1933 1934 1934 /** 1935 * Ensures that `wp_kses()` preserves various kinds of HTML comments, both valid and invalid. 1936 * 1937 * @ticket 61009 1938 * 1939 * @param string $html_comment HTML containing a comment; must not be a valid comment 1940 * but must be syntax which a browser interprets as a comment. 1941 * @param string $expected_output How `wp_kses()` ought to transform the comment. 1942 */ 1943 public function wp_kses_preserves_html_comments( $html_comment, $expected_output ) { 1944 $this->assertSame( 1945 $expected_output, 1946 wp_kses( $html_comment, array() ), 1947 'Failed to properly preserve HTML comment.' 1948 ); 1949 } 1950 1951 /** 1952 * Data provider. 1953 * 1954 * @return array[]. 1955 */ 1956 public static function data_html_containing_various_kinds_of_html_comments() { 1957 return array( 1958 'Normative HTML comment' => array( 'before<!-- this is a comment -->after', 'before<!-- this is a comment -->after' ), 1959 'Closing tag with invalid tag name' => array( 'before<//not a tag>after', 'before<//not a tag>after' ), 1960 ); 1961 } 1962 1963 /** 1935 1964 * Test that attributes with a list of allowed values are filtered correctly. 1936 1965 *
Note: See TracChangeset
for help on using the changeset viewer.