Changeset 58926
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/html-api/class-wp-html-processor.php
r58925 r58926 3973 3973 */ 3974 3974 private function step_after_body(): bool { 3975 $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY . ' state.' ); 3975 $tag_name = $this->get_token_name(); 3976 $token_type = $this->get_token_type(); 3977 $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; 3978 $op = "{$op_sigil}{$tag_name}"; 3979 3980 switch ( $op ) { 3981 /* 3982 * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), 3983 * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE 3984 * 3985 * > Process the token using the rules for the "in body" insertion mode. 3986 */ 3987 case '#text': 3988 $text = $this->get_modifiable_text(); 3989 if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) { 3990 return $this->step_in_body(); 3991 } 3992 goto after_body_anything_else; 3993 break; 3994 3995 /* 3996 * > A comment token 3997 */ 3998 case '#comment': 3999 case '#funky-comment': 4000 case '#presumptuous-tag': 4001 $this->bail( 'Content outside of BODY is unsupported.' ); 4002 break; 4003 4004 /* 4005 * > A DOCTYPE token 4006 */ 4007 case 'html': 4008 // Parse error: ignore the token. 4009 return $this->step(); 4010 4011 /* 4012 * > A start tag whose tag name is "html" 4013 */ 4014 case '+HTML': 4015 return $this->step_in_body(); 4016 4017 /* 4018 * > An end tag whose tag name is "html" 4019 * 4020 * > If the parser was created as part of the HTML fragment parsing algorithm, 4021 * > this is a parse error; ignore the token. (fragment case) 4022 * > 4023 * > Otherwise, switch the insertion mode to "after after body". 4024 */ 4025 case '-HTML': 4026 if ( isset( $this->context_node ) ) { 4027 return $this->step(); 4028 } 4029 4030 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY; 4031 return true; 4032 } 4033 4034 /* 4035 * > Parse error. Switch the insertion mode to "in body" and reprocess the token. 4036 */ 4037 after_body_anything_else: 4038 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; 4039 return $this->step( self::REPROCESS_CURRENT_NODE ); 3976 4040 } 3977 4041 … … 3992 4056 */ 3993 4057 private function step_in_frameset(): bool { 3994 $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET . ' state.' ); 4058 $tag_name = $this->get_token_name(); 4059 $token_type = $this->get_token_type(); 4060 $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; 4061 $op = "{$op_sigil}{$tag_name}"; 4062 4063 switch ( $op ) { 4064 /* 4065 * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), 4066 * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE 4067 * > 4068 * > Insert the character. 4069 * 4070 * This algorithm effectively strips non-whitespace characters from text and inserts 4071 * them under HTML. This is not supported at this time. 4072 */ 4073 case '#text': 4074 $text = $this->get_modifiable_text(); 4075 $text = $this->get_modifiable_text(); 4076 if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) { 4077 return $this->step_in_body(); 4078 } 4079 $this->bail( 'Non-whitespace characters cannot be handled in frameset.' ); 4080 break; 4081 4082 /* 4083 * > A comment token 4084 */ 4085 case '#comment': 4086 case '#funky-comment': 4087 case '#presumptuous-tag': 4088 $this->insert_html_element( $this->state->current_token ); 4089 return true; 4090 4091 /* 4092 * > A DOCTYPE token 4093 */ 4094 case 'html': 4095 // Parse error: ignore the token. 4096 return $this->step(); 4097 4098 /* 4099 * > A start tag whose tag name is "html" 4100 */ 4101 case '+HTML': 4102 return $this->step_in_body(); 4103 4104 /* 4105 * > A start tag whose tag name is "frameset" 4106 */ 4107 case '+FRAMESET': 4108 $this->insert_html_element( $this->state->current_token ); 4109 return true; 4110 4111 /* 4112 * > An end tag whose tag name is "frameset" 4113 */ 4114 case '-FRAMESET': 4115 /* 4116 * > If the current node is the root html element, then this is a parse error; 4117 * > ignore the token. (fragment case) 4118 */ 4119 if ( $this->state->stack_of_open_elements->current_node_is( 'HTML' ) ) { 4120 return $this->step(); 4121 } 4122 4123 /* 4124 * > Otherwise, pop the current node from the stack of open elements. 4125 */ 4126 $this->state->stack_of_open_elements->pop(); 4127 4128 /* 4129 * > If the parser was not created as part of the HTML fragment parsing algorithm 4130 * > (fragment case), and the current node is no longer a frameset element, then 4131 * > switch the insertion mode to "after frameset". 4132 */ 4133 if ( ! isset( $this->context_node ) && ! $this->state->stack_of_open_elements->current_node_is( 'FRAMESET' ) ) { 4134 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET; 4135 } 4136 4137 return true; 4138 4139 /* 4140 * > A start tag whose tag name is "frame" 4141 * 4142 * > Insert an HTML element for the token. Immediately pop the 4143 * > current node off the stack of open elements. 4144 * > 4145 * > Acknowledge the token's self-closing flag, if it is set. 4146 */ 4147 case '+FRAME': 4148 $this->insert_html_element( $this->state->current_token ); 4149 $this->state->stack_of_open_elements->pop(); 4150 return true; 4151 4152 /* 4153 * > A start tag whose tag name is "noframes" 4154 */ 4155 case '+NOFRAMES': 4156 return $this->step_in_head(); 4157 } 4158 4159 // Parse error: ignore the token. 4160 return $this->step(); 3995 4161 } 3996 4162 … … 4011 4177 */ 4012 4178 private function step_after_frameset(): bool { 4013 $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET . ' state.' ); 4179 $tag_name = $this->get_token_name(); 4180 $token_type = $this->get_token_type(); 4181 $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; 4182 $op = "{$op_sigil}{$tag_name}"; 4183 4184 switch ( $op ) { 4185 /* 4186 * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), 4187 * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE 4188 * > 4189 * > Insert the character. 4190 * 4191 * This algorithm effectively strips non-whitespace characters from text and inserts 4192 * them under HTML. This is not supported at this time. 4193 */ 4194 case '#text': 4195 $text = $this->get_modifiable_text(); 4196 if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) { 4197 return $this->step_in_body(); 4198 } 4199 $this->bail( 'Non-whitespace characters cannot be handled in after frameset' ); 4200 break; 4201 4202 /* 4203 * > A comment token 4204 */ 4205 case '#comment': 4206 case '#funky-comment': 4207 case '#presumptuous-tag': 4208 $this->insert_html_element( $this->state->current_token ); 4209 return true; 4210 4211 /* 4212 * > A DOCTYPE token 4213 */ 4214 case 'html': 4215 // Parse error: ignore the token. 4216 return $this->step(); 4217 4218 /* 4219 * > A start tag whose tag name is "html" 4220 */ 4221 case '+HTML': 4222 return $this->step_in_body(); 4223 4224 /* 4225 * > An end tag whose tag name is "html" 4226 */ 4227 case '-HTML': 4228 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET; 4229 return true; 4230 4231 /* 4232 * > A start tag whose tag name is "noframes" 4233 */ 4234 case '+NOFRAMES': 4235 return $this->step_in_head(); 4236 } 4237 4238 // Parse error: ignore the token. 4239 return $this->step(); 4014 4240 } 4015 4241 … … 4030 4256 */ 4031 4257 private function step_after_after_body(): bool { 4032 $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY . ' state.' ); 4258 $tag_name = $this->get_token_name(); 4259 $token_type = $this->get_token_type(); 4260 $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; 4261 $op = "{$op_sigil}{$tag_name}"; 4262 4263 switch ( $op ) { 4264 /* 4265 * > A comment token 4266 */ 4267 case '#comment': 4268 case '#funky-comment': 4269 case '#presumptuous-tag': 4270 $this->bail( 'Content outside of HTML is unsupported.' ); 4271 break; 4272 4273 /* 4274 * > A DOCTYPE token 4275 * > A start tag whose tag name is "html" 4276 * 4277 * > Process the token using the rules for the "in body" insertion mode. 4278 */ 4279 case 'html': 4280 case '+HTML': 4281 return $this->step_in_body(); 4282 4283 /* 4284 * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), 4285 * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE 4286 * > 4287 * > Process the token using the rules for the "in body" insertion mode. 4288 */ 4289 case '#text': 4290 $text = $this->get_modifiable_text(); 4291 if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) { 4292 return $this->step_in_body(); 4293 } 4294 goto after_after_body_anything_else; 4295 break; 4296 } 4297 4298 /* 4299 * > Parse error. Switch the insertion mode to "in body" and reprocess the token. 4300 */ 4301 after_after_body_anything_else: 4302 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; 4303 return $this->step( self::REPROCESS_CURRENT_NODE ); 4033 4304 } 4034 4305 … … 4049 4320 */ 4050 4321 private function step_after_after_frameset(): bool { 4051 $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET . ' state.' ); 4322 $tag_name = $this->get_token_name(); 4323 $token_type = $this->get_token_type(); 4324 $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; 4325 $op = "{$op_sigil}{$tag_name}"; 4326 4327 switch ( $op ) { 4328 /* 4329 * > A comment token 4330 */ 4331 case '#comment': 4332 case '#funky-comment': 4333 case '#presumptuous-tag': 4334 $this->bail( 'Content outside of HTML is unsupported.' ); 4335 break; 4336 4337 /* 4338 * > A DOCTYPE token 4339 * > A start tag whose tag name is "html" 4340 * 4341 * > Process the token using the rules for the "in body" insertion mode. 4342 */ 4343 case 'html': 4344 case '+HTML': 4345 return $this->step_in_body(); 4346 4347 /* 4348 * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), 4349 * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE 4350 * > 4351 * > Process the token using the rules for the "in body" insertion mode. 4352 * 4353 * This algorithm effectively strips non-whitespace characters from text and inserts 4354 * them under HTML. This is not supported at this time. 4355 */ 4356 case '#text': 4357 $text = $this->get_modifiable_text(); 4358 if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) { 4359 return $this->step_in_body(); 4360 } 4361 $this->bail( 'Non-whitespace characters cannot be handled in after after frameset.' ); 4362 break; 4363 4364 /* 4365 * > A start tag whose tag name is "noframes" 4366 */ 4367 case '+NOFRAMES': 4368 return $this->step_in_head(); 4369 } 4370 4371 // Parse error: ignore the token. 4372 return $this->step(); 4052 4373 } 4053 4374 … … 4116 4437 case '#cdata-section': 4117 4438 case '#comment': 4118 case '#funky_comment': 4439 case '#funky-comment': 4440 case '#presumptuous-tag': 4119 4441 $this->insert_foreign_element( $this->state->current_token, false ); 4120 4442 return true; -
trunk/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
r58925 r58926 34 34 'tests14/line0022' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', 35 35 'tests14/line0055' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', 36 'tests19/line0488' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', 37 'tests19/line0500' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', 36 38 'tests19/line0965' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.', 37 39 'tests19/line1079' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', 38 40 'tests2/line0207' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', 39 41 'tests2/line0686' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', 42 'tests2/line0697' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', 40 43 'tests2/line0709' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.', 41 44 'tests5/line0013' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.',
Note: See TracChangeset
for help on using the changeset viewer.