Changeset 58867
- Timestamp:
- 08/08/2024 07:23:53 AM (9 months ago)
- Location:
- trunk
- Files:
-
- 10 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/html-api/class-wp-html-open-elements.php
r58833 r58867 114 114 * @param int $nth Retrieve the nth item on the stack, with 1 being 115 115 * the top element, 2 being the second, etc... 116 * @return string|null Name of the node on the stack at the given location,117 * or `null` if the location isn't on the stack.118 */ 119 public function at( int $nth ): ? string{116 * @return WP_HTML_Token|null Name of the node on the stack at the given location, 117 * or `null` if the location isn't on the stack. 118 */ 119 public function at( int $nth ): ?WP_HTML_Token { 120 120 foreach ( $this->walk_down() as $item ) { 121 121 if ( 0 === --$nth ) { 122 return $item ->node_name;122 return $item; 123 123 } 124 124 } … … 243 243 public function has_element_in_specific_scope( string $tag_name, $termination_list ): bool { 244 244 foreach ( $this->walk_up() as $node ) { 245 if ( $node->node_name === $tag_name ) { 245 $namespaced_name = 'html' === $node->namespace 246 ? $node->node_name 247 : "{$node->namespace} {$node->node_name}"; 248 249 if ( $namespaced_name === $tag_name ) { 246 250 return true; 247 251 } … … 249 253 if ( 250 254 '(internal: H1 through H6 - do not use)' === $tag_name && 251 in_array( $n ode->node_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true )255 in_array( $namespaced_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true ) 252 256 ) { 253 257 return true; 254 258 } 255 259 256 if ( in_array( $n ode->node_name, $termination_list, true ) ) {260 if ( in_array( $namespaced_name, $termination_list, true ) ) { 257 261 return false; 258 262 } … … 289 293 * 290 294 * @since 6.4.0 291 * @since 6.7.0 Supports all required HTML elements.295 * @since 6.7.0 Full support. 292 296 * 293 297 * @see https://html.spec.whatwg.org/#has-an-element-in-scope … … 310 314 'TEMPLATE', 311 315 312 /* 313 * @todo Support SVG and MathML nodes when support for foreign content is added. 314 * 315 * - MathML mi 316 * - MathML mo 317 * - MathML mn 318 * - MathML ms 319 * - MathML mtext 320 * - MathML annotation-xml 321 * - SVG foreignObject 322 * - SVG desc 323 * - SVG title 324 */ 316 'math MI', 317 'math MO', 318 'math MN', 319 'math MS', 320 'math MTEXT', 321 'math ANNOTATION-XML', 322 323 'svg FOREIGNOBJECT', 324 'svg DESC', 325 'svg TITLE', 325 326 ) 326 327 ); … … 364 365 'UL', 365 366 366 /* 367 * @todo Support SVG and MathML nodes when support for foreign content is added. 368 * 369 * - MathML mi 370 * - MathML mo 371 * - MathML mn 372 * - MathML ms 373 * - MathML mtext 374 * - MathML annotation-xml 375 * - SVG foreignObject 376 * - SVG desc 377 * - SVG title 378 */ 367 'math MI', 368 'math MO', 369 'math MN', 370 'math MS', 371 'math MTEXT', 372 'math ANNOTATION-XML', 373 374 'svg FOREIGNOBJECT', 375 'svg DESC', 376 'svg TITLE', 379 377 ) 380 378 ); … … 414 412 'TEMPLATE', 415 413 416 /* 417 * @todo Support SVG and MathML nodes when support for foreign content is added. 418 * 419 * - MathML mi 420 * - MathML mo 421 * - MathML mn 422 * - MathML ms 423 * - MathML mtext 424 * - MathML annotation-xml 425 * - SVG foreignObject 426 * - SVG desc 427 * - SVG title 428 */ 414 'math MI', 415 'math MO', 416 'math MN', 417 'math MS', 418 'math MTEXT', 419 'math ANNOTATION-XML', 420 421 'svg FOREIGNOBJECT', 422 'svg DESC', 423 'svg TITLE', 429 424 ) 430 425 ); … … 693 688 */ 694 689 public function after_element_push( WP_HTML_Token $item ): void { 690 $namespaced_name = 'html' === $item->namespace 691 ? $item->node_name 692 : "{$item->namespace} {$item->node_name}"; 693 695 694 /* 696 695 * When adding support for new elements, expand this switch to trap 697 696 * cases where the precalculated value needs to change. 698 697 */ 699 switch ( $ item->node_name ) {698 switch ( $namespaced_name ) { 700 699 case 'APPLET': 701 700 case 'BUTTON': … … 708 707 case 'OBJECT': 709 708 case 'TEMPLATE': 709 case 'math MI': 710 case 'math MO': 711 case 'math MN': 712 case 'math MS': 713 case 'math MTEXT': 714 case 'math ANNOTATION-XML': 715 case 'svg FOREIGNOBJECT': 716 case 'svg DESC': 717 case 'svg TITLE': 710 718 $this->has_p_in_button_scope = false; 711 719 break; … … 751 759 case 'OBJECT': 752 760 case 'TEMPLATE': 761 case 'math MI': 762 case 'math MO': 763 case 'math MN': 764 case 'math MS': 765 case 'math MTEXT': 766 case 'math ANNOTATION-XML': 767 case 'svg FOREIGNOBJECT': 768 case 'svg DESC': 769 case 'svg TITLE': 753 770 $this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' ); 754 771 break; -
trunk/src/wp-includes/html-api/class-wp-html-processor-state.php
r58836 r58867 301 301 302 302 /** 303 * In foreign content insertion mode for full HTML parser.304 *305 * @since 6.7.0306 *307 * @see https://html.spec.whatwg.org/#parsing-main-inforeign308 * @see WP_HTML_Processor_State::$insertion_mode309 *310 * @var string311 */312 const INSERTION_MODE_IN_FOREIGN_CONTENT = 'insertion-mode-in-foreign-content';313 314 /**315 303 * No-quirks mode document compatability mode. 316 304 * -
trunk/src/wp-includes/html-api/class-wp-html-processor.php
r58841 r58867 308 308 $processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 ); 309 309 310 $processor->state->stack_of_open_elements->push( 311 new WP_HTML_Token( 312 'root-node', 313 'HTML', 314 false 315 ) 310 $root_node = new WP_HTML_Token( 311 'root-node', 312 'HTML', 313 false 316 314 ); 315 316 $processor->state->stack_of_open_elements->push( $root_node ); 317 317 318 318 $context_node = new WP_HTML_Token( … … 393 393 $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; 394 394 $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance ); 395 396 $this->change_parsing_namespace( $token->namespace ); 395 397 } 396 398 ); … … 402 404 $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; 403 405 $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::POP, $provenance ); 406 $adjusted_current_node = $this->get_adjusted_current_node(); 407 $this->change_parsing_namespace( 408 $adjusted_current_node 409 ? $adjusted_current_node->namespace 410 : 'html' 411 ); 404 412 } 405 413 ); … … 768 776 * closing as soon as the processor advances to the next token. 769 777 * 778 * @todo Review the self-closing logic when no node is present, ensure it 779 * matches the expectations in `step()`. 780 * 770 781 * @since 6.6.0 771 *772 * @todo When adding support for foreign content, ensure that773 * this returns false for self-closing elements in the774 * SVG and MathML namespace.775 782 * 776 783 * @param WP_HTML_Token|null $node Optional. Node to examine, if provided. … … 779 786 * or `null` if not matched on any token. 780 787 */ 781 public function expects_closer( $node = null ): ?bool { 782 $token_name = $node->node_name ?? $this->get_token_name(); 788 public function expects_closer( WP_HTML_Token $node = null ): ?bool { 789 $token_name = $node->node_name ?? $this->get_token_name(); 790 $token_namespace = $node->namespace ?? $this->get_namespace(); 791 783 792 if ( ! isset( $token_name ) ) { 784 793 return null; … … 793 802 self::is_void( $token_name ) || 794 803 // Special atomic elements. 795 in_array( $token_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) 804 ( 'html' === $token_namespace && in_array( $token_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) || 805 // Self-closing elements in foreign content. 806 ( isset( $node ) && 'html' !== $node->namespace && $node->has_self_closing_flag ) 796 807 ); 797 808 } … … 825 836 * When moving on to the next node, therefore, if the bottom-most element 826 837 * on the stack is a void element, it must be closed. 827 *828 * @todo Once self-closing foreign elements and BGSOUND are supported,829 * they must also be implicitly closed here too. BGSOUND is830 * special since it's only self-closing if the self-closing flag831 * is provided in the opening tag, otherwise it expects a tag closer.832 838 */ 833 839 $top_node = $this->state->stack_of_open_elements->current_node(); 834 if ( isset( $top_node ) && ! static::expects_closer( $top_node ) ) {840 if ( isset( $top_node ) && ! $this->expects_closer( $top_node ) ) { 835 841 $this->state->stack_of_open_elements->pop(); 836 842 } … … 849 855 } 850 856 851 $this->state->current_token = new WP_HTML_Token( 852 $this->bookmark_token(), 853 $this->get_token_name(), 854 $this->has_self_closing_flag(), 855 $this->release_internal_bookmark_on_destruct 857 $adjusted_current_node = $this->get_adjusted_current_node(); 858 $is_closer = $this->is_tag_closer(); 859 $is_start_tag = WP_HTML_Tag_Processor::STATE_MATCHED_TAG === $this->parser_state && ! $is_closer; 860 $token_name = $this->get_token_name(); 861 862 if ( self::REPROCESS_CURRENT_NODE !== $node_to_process ) { 863 $this->state->current_token = new WP_HTML_Token( 864 $this->bookmark_token(), 865 $token_name, 866 $this->has_self_closing_flag(), 867 $this->release_internal_bookmark_on_destruct 868 ); 869 } 870 871 $parse_in_current_insertion_mode = ( 872 0 === $this->state->stack_of_open_elements->count() || 873 'html' === $adjusted_current_node->namespace || 874 ( 875 'math' === $adjusted_current_node->integration_node_type && 876 ( 877 ( $is_start_tag && ! in_array( $token_name, array( 'MGLYPH', 'MALIGNMARK' ), true ) ) || 878 '#text' === $token_name 879 ) 880 ) || 881 ( 882 'math' === $adjusted_current_node->namespace && 883 'ANNOTATION-XML' === $adjusted_current_node->node_name && 884 $is_start_tag && 'SVG' === $token_name 885 ) || 886 ( 887 'html' === $adjusted_current_node->integration_node_type && 888 ( $is_start_tag || '#text' === $token_name ) 889 ) 856 890 ); 857 891 858 892 try { 893 if ( ! $parse_in_current_insertion_mode ) { 894 return $this->step_in_foreign_content(); 895 } 896 859 897 switch ( $this->state->insertion_mode ) { 860 898 case WP_HTML_Processor_State::INSERTION_MODE_INITIAL: … … 923 961 case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET: 924 962 return $this->step_after_after_frameset(); 925 926 case WP_HTML_Processor_State::INSERTION_MODE_IN_FOREIGN_CONTENT:927 return $this->step_in_foreign_content();928 963 929 964 // This should be unreachable but PHP doesn't have total type checking on switch. … … 1854 1889 if ( 1855 1890 1 === $this->state->stack_of_open_elements->count() || 1856 'BODY' !== $this->state->stack_of_open_elements->at( 2) ||1891 'BODY' !== ( $this->state->stack_of_open_elements->at( 2 )->node_name ?? null ) || 1857 1892 $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) 1858 1893 ) { … … 1880 1915 if ( 1881 1916 1 === $this->state->stack_of_open_elements->count() || 1882 'BODY' !== $this->state->stack_of_open_elements->at( 2) ||1917 'BODY' !== ( $this->state->stack_of_open_elements->at( 2 )->node_name ?? null ) || 1883 1918 false === $this->state->frameset_ok 1884 1919 ) { … … 2076 2111 'DIV' !== $node->node_name && 2077 2112 'P' !== $node->node_name && 2078 $this->is_special( $node->node_name )2113 self::is_special( $node ) 2079 2114 ) { 2080 2115 /* … … 2137 2172 * > "figcaption", "figure", "footer", "header", "hgroup", "listing", "main", 2138 2173 * > "menu", "nav", "ol", "pre", "search", "section", "summary", "ul" 2139 *2140 * @todo This needs to check if the element in scope is an HTML element, meaning that2141 * when SVG and MathML support is added, this needs to differentiate between an2142 * HTML element of the given name, such as `<center>`, and a foreign element of2143 * the same given name.2144 2174 */ 2145 2175 case '-ADDRESS': … … 2412 2442 /* 2413 2443 * > A end tag token whose tag name is one of: "applet", "marquee", "object" 2414 *2415 * @todo This needs to check if the element in scope is an HTML element, meaning that2416 * when SVG and MathML support is added, this needs to differentiate between an2417 * HTML element of the given name, such as `<object>`, and a foreign element of2418 * the same given name.2419 2444 */ 2420 2445 case '-APPLET': … … 2680 2705 * These ought to be handled in the attribute methods. 2681 2706 */ 2682 2683 $this->bail( 'Cannot process MATH element, opening foreign content.' ); 2684 break; 2707 $this->state->current_token->namespace = 'math'; 2708 $this->insert_html_element( $this->state->current_token ); 2709 if ( $this->state->current_token->has_self_closing_flag ) { 2710 $this->state->stack_of_open_elements->pop(); 2711 } 2712 return true; 2685 2713 2686 2714 /* … … 2696 2724 * These ought to be handled in the attribute methods. 2697 2725 */ 2698 2699 $this->bail( 'Cannot process SVG element, opening foreign content.' ); 2700 break; 2726 $this->state->current_token->namespace = 'svg'; 2727 $this->insert_html_element( $this->state->current_token ); 2728 if ( $this->state->current_token->has_self_closing_flag ) { 2729 $this->state->stack_of_open_elements->pop(); 2730 } 2731 return true; 2701 2732 2702 2733 /* … … 2738 2769 */ 2739 2770 foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { 2740 /* 2741 * @todo This needs to check if the element in scope is an HTML element, meaning that 2742 * when SVG and MathML support is added, this needs to differentiate between an 2743 * HTML element of the given name, such as `<object>`, and a foreign element of 2744 * the same given name. 2745 */ 2746 if ( $token_name === $node->node_name ) { 2771 if ( 'html' === $node->namespace && $token_name === $node->node_name ) { 2747 2772 break; 2748 2773 } 2749 2774 2750 if ( self::is_special( $node ->node_name) ) {2775 if ( self::is_special( $node ) ) { 2751 2776 // This is a parse error, ignore the token. 2752 2777 return $this->step(); … … 4070 4095 */ 4071 4096 private function step_in_foreign_content(): bool { 4072 $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_FOREIGN_CONTENT . ' state.' ); 4097 $tag_name = $this->get_token_name(); 4098 $token_type = $this->get_token_type(); 4099 $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; 4100 $op = "{$op_sigil}{$tag_name}"; 4101 4102 /* 4103 * > A start tag whose name is "font", if the token has any attributes named "color", "face", or "size" 4104 * 4105 * This section drawn out above the switch to more easily incorporate 4106 * the additional rules based on the presence of the attributes. 4107 */ 4108 if ( 4109 '+FONT' === $op && 4110 ( 4111 null !== $this->get_attribute( 'color' ) || 4112 null !== $this->get_attribute( 'face' ) || 4113 null !== $this->get_attribute( 'size' ) 4114 ) 4115 ) { 4116 $op = '+FONT with attributes'; 4117 } 4118 4119 switch ( $op ) { 4120 case '#text': 4121 /* 4122 * > A character token that is U+0000 NULL 4123 * 4124 * This is handled by `get_modifiable_text()`. 4125 */ 4126 4127 /* 4128 * Whitespace-only text does not affect the frameset-ok flag. 4129 * It is probably inter-element whitespace, but it may also 4130 * contain character references which decode only to whitespace. 4131 */ 4132 $text = $this->get_modifiable_text(); 4133 if ( strlen( $text ) !== strspn( $text, " \t\n\f\r" ) ) { 4134 $this->state->frameset_ok = false; 4135 } 4136 4137 $this->insert_foreign_element( $this->state->current_token, false ); 4138 return true; 4139 4140 /* 4141 * > A comment token 4142 */ 4143 case '#cdata-section': 4144 case '#comment': 4145 case '#funky_comment': 4146 $this->insert_foreign_element( $this->state->current_token, false ); 4147 return true; 4148 4149 /* 4150 * > A DOCTYPE token 4151 */ 4152 case 'html': 4153 // Parse error: ignore the token. 4154 return $this->step(); 4155 4156 /* 4157 * > A start tag whose tag name is "b", "big", "blockquote", "body", "br", "center", 4158 * > "code", "dd", "div", "dl", "dt", "em", "embed", "h1", "h2", "h3", "h4", "h5", 4159 * > "h6", "head", "hr", "i", "img", "li", "listing", "menu", "meta", "nobr", "ol", 4160 * > "p", "pre", "ruby", "s", "small", "span", "strong", "strike", "sub", "sup", 4161 * > "table", "tt", "u", "ul", "var" 4162 * 4163 * > A start tag whose name is "font", if the token has any attributes named "color", "face", or "size" 4164 * 4165 * > An end tag whose tag name is "br", "p" 4166 * 4167 * Closing BR tags are always reported by the Tag Processor as opening tags. 4168 */ 4169 case '+B': 4170 case '+BIG': 4171 case '+BLOCKQUOTE': 4172 case '+BODY': 4173 case '+BR': 4174 case '+CENTER': 4175 case '+CODE': 4176 case '+DD': 4177 case '+DIV': 4178 case '+DL': 4179 case '+DT': 4180 case '+EM': 4181 case '+EMBED': 4182 case '+H1': 4183 case '+H2': 4184 case '+H3': 4185 case '+H4': 4186 case '+H5': 4187 case '+H6': 4188 case '+HEAD': 4189 case '+HR': 4190 case '+I': 4191 case '+IMG': 4192 case '+LI': 4193 case '+LISTING': 4194 case '+MENU': 4195 case '+META': 4196 case '+NOBR': 4197 case '+OL': 4198 case '+P': 4199 case '+PRE': 4200 case '+RUBY': 4201 case '+S': 4202 case '+SMALL': 4203 case '+SPAN': 4204 case '+STRONG': 4205 case '+STRIKE': 4206 case '+SUB': 4207 case '+SUP': 4208 case '+TABLE': 4209 case '+TT': 4210 case '+U': 4211 case '+UL': 4212 case '+VAR': 4213 case '+FONT with attributes': 4214 case '-BR': 4215 case '-P': 4216 // @todo Indicate a parse error once it's possible. 4217 foreach ( $this->state->stack_of_open_elements->walk_up() as $current_node ) { 4218 if ( 4219 'math' === $current_node->integration_node_type || 4220 'html' === $current_node->integration_node_type || 4221 'html' === $current_node->namespace 4222 ) { 4223 break; 4224 } 4225 4226 $this->state->stack_of_open_elements->pop(); 4227 } 4228 return $this->step( self::REPROCESS_CURRENT_NODE ); 4229 } 4230 4231 /* 4232 * > Any other start tag 4233 */ 4234 if ( ! $this->is_tag_closer() ) { 4235 $this->insert_foreign_element( $this->state->current_token, false ); 4236 4237 /* 4238 * > If the token has its self-closing flag set, then run 4239 * > the appropriate steps from the following list: 4240 */ 4241 if ( $this->state->current_token->has_self_closing_flag ) { 4242 if ( 'SCRIPT' === $this->state->current_token->node_name && 'svg' === $this->state->current_token->namespace ) { 4243 /* 4244 * > Acknowledge the token's self-closing flag, and then act as 4245 * > described in the steps for a "script" end tag below. 4246 * 4247 * @todo Verify that this shouldn't be handled by the rule for 4248 * "An end tag whose name is 'script', if the current node 4249 * is an SVG script element." 4250 */ 4251 goto in_foreign_content_any_other_end_tag; 4252 } else { 4253 $this->state->stack_of_open_elements->pop(); 4254 } 4255 } 4256 return true; 4257 } 4258 4259 /* 4260 * > An end tag whose name is "script", if the current node is an SVG script element. 4261 */ 4262 if ( $this->is_tag_closer() && 'SCRIPT' === $this->state->current_token->node_name && 'svg' === $this->state->current_token->namespace ) { 4263 $this->state->stack_of_open_elements->pop(); 4264 } 4265 4266 /* 4267 * > Any other end tag 4268 */ 4269 if ( $this->is_tag_closer() ) { 4270 in_foreign_content_any_other_end_tag: 4271 $node = $this->state->stack_of_open_elements->current_node(); 4272 if ( $tag_name !== $node->node_name ) { 4273 // @todo Indicate a parse error once it's possible. 4274 } 4275 in_foreign_content_end_tag_loop: 4276 if ( $node === $this->state->stack_of_open_elements->at( 1 ) ) { 4277 return true; 4278 } 4279 4280 /* 4281 * > If node's tag name, converted to ASCII lowercase, is the same as the tag name 4282 * > of the token, pop elements from the stack of open elements until node has 4283 * > been popped from the stack, and then return. 4284 */ 4285 if ( 0 === strcasecmp( $node->node_name, $tag_name ) ) { 4286 foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { 4287 $this->state->stack_of_open_elements->pop(); 4288 if ( $node === $item ) { 4289 return true; 4290 } 4291 } 4292 } 4293 4294 foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) { 4295 $node = $item; 4296 break; 4297 } 4298 4299 if ( 'html' !== $node->namespace ) { 4300 goto in_foreign_content_end_tag_loop; 4301 } 4302 4303 switch ( $this->state->insertion_mode ) { 4304 case WP_HTML_Processor_State::INSERTION_MODE_INITIAL: 4305 return $this->step_initial(); 4306 4307 case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML: 4308 return $this->step_before_html(); 4309 4310 case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD: 4311 return $this->step_before_head(); 4312 4313 case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD: 4314 return $this->step_in_head(); 4315 4316 case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD_NOSCRIPT: 4317 return $this->step_in_head_noscript(); 4318 4319 case WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD: 4320 return $this->step_after_head(); 4321 4322 case WP_HTML_Processor_State::INSERTION_MODE_IN_BODY: 4323 return $this->step_in_body(); 4324 4325 case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE: 4326 return $this->step_in_table(); 4327 4328 case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_TEXT: 4329 return $this->step_in_table_text(); 4330 4331 case WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION: 4332 return $this->step_in_caption(); 4333 4334 case WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP: 4335 return $this->step_in_column_group(); 4336 4337 case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY: 4338 return $this->step_in_table_body(); 4339 4340 case WP_HTML_Processor_State::INSERTION_MODE_IN_ROW: 4341 return $this->step_in_row(); 4342 4343 case WP_HTML_Processor_State::INSERTION_MODE_IN_CELL: 4344 return $this->step_in_cell(); 4345 4346 case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT: 4347 return $this->step_in_select(); 4348 4349 case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE: 4350 return $this->step_in_select_in_table(); 4351 4352 case WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE: 4353 return $this->step_in_template(); 4354 4355 case WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY: 4356 return $this->step_after_body(); 4357 4358 case WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET: 4359 return $this->step_in_frameset(); 4360 4361 case WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET: 4362 return $this->step_after_frameset(); 4363 4364 case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY: 4365 return $this->step_after_after_body(); 4366 4367 case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET: 4368 return $this->step_after_after_frameset(); 4369 4370 // This should be unreachable but PHP doesn't have total type checking on switch. 4371 default: 4372 $this->bail( "Unaware of the requested parsing mode: '{$this->state->insertion_mode}'." ); 4373 } 4374 } 4073 4375 } 4074 4376 … … 4099 4401 * HTML semantic overrides for Tag Processor 4100 4402 */ 4403 4404 /** 4405 * Indicates the namespace of the current token, or "html" if there is none. 4406 * 4407 * @return string One of "html", "math", or "svg". 4408 */ 4409 public function get_namespace(): string { 4410 if ( ! isset( $this->current_element ) ) { 4411 return 'html'; 4412 } 4413 4414 return $this->current_element->token->namespace; 4415 } 4101 4416 4102 4417 /** … … 4736 5051 4737 5052 /** 5053 * Returns the adjusted current node. 5054 * 5055 * > The adjusted current node is the context element if the parser was created as 5056 * > part of the HTML fragment parsing algorithm and the stack of open elements 5057 * > has only one element in it (fragment case); otherwise, the adjusted current 5058 * > node is the current node. 5059 * 5060 * @see https://html.spec.whatwg.org/#adjusted-current-node 5061 * 5062 * @since 6.7.0 5063 * 5064 * @return WP_HTML_Token|null The adjusted current node. 5065 */ 5066 private function get_adjusted_current_node(): ?WP_HTML_Token { 5067 if ( isset( $this->context_node ) && 1 === $this->state->stack_of_open_elements->count() ) { 5068 return $this->context_node; 5069 } 5070 5071 return $this->state->stack_of_open_elements->current_node(); 5072 } 5073 5074 /** 4738 5075 * Reconstructs the active formatting elements. 4739 5076 * … … 5044 5381 } 5045 5382 5046 if ( self::is_special( $item ->node_name) ) {5383 if ( self::is_special( $item ) ) { 5047 5384 $furthest_block = $item; 5048 5385 break; … … 5113 5450 5114 5451 /** 5452 * Inserts a foreign element on to the stack of open elements. 5453 * 5454 * @since 6.7.0 5455 * 5456 * @see https://html.spec.whatwg.org/#insert-a-foreign-element 5457 * 5458 * @param WP_HTML_Token $token Insert this token. The token's namespace and 5459 * insertion point will be updated correctly. 5460 * @param bool $only_add_to_element_stack Whether to skip the "insert an element at the adjusted 5461 * insertion location" algorithm when adding this element. 5462 */ 5463 private function insert_foreign_element( WP_HTML_Token $token, bool $only_add_to_element_stack ): void { 5464 $adjusted_current_node = $this->get_adjusted_current_node(); 5465 5466 $token->namespace = $adjusted_current_node ? $adjusted_current_node->namespace : 'html'; 5467 5468 if ( $this->is_mathml_integration_point() ) { 5469 $token->integration_node_type = 'math'; 5470 } elseif ( $this->is_html_integration_point() ) { 5471 $token->integration_node_type = 'html'; 5472 } 5473 5474 if ( false === $only_add_to_element_stack ) { 5475 /* 5476 * @todo Implement the "appropriate place for inserting a node" and the 5477 * "insert an element at the adjusted insertion location" algorithms. 5478 * 5479 * These algorithms mostly impacts DOM tree construction and not the HTML API. 5480 * Here, there's no DOM node onto which the element will be appended, so the 5481 * parser will skip this step. 5482 * 5483 * @see https://html.spec.whatwg.org/#insert-an-element-at-the-adjusted-insertion-location 5484 */ 5485 } 5486 5487 $this->insert_html_element( $token ); 5488 } 5489 5490 /** 5115 5491 * Inserts a virtual element on the stack of open elements. 5116 5492 * … … 5138 5514 5139 5515 /** 5516 * Indicates if the current token is a MathML integration point. 5517 * 5518 * @since 6.7.0 5519 * 5520 * @see https://html.spec.whatwg.org/#mathml-text-integration-point 5521 * 5522 * @return bool Whether the current token is a MathML integration point. 5523 */ 5524 private function is_mathml_integration_point(): bool { 5525 $current_token = $this->state->current_token; 5526 if ( ! isset( $current_token ) ) { 5527 return false; 5528 } 5529 5530 if ( 'math' !== $current_token->namespace || 'M' !== $current_token->node_name[0] ) { 5531 return false; 5532 } 5533 5534 $tag_name = $current_token->node_name; 5535 5536 return ( 5537 'MI' === $tag_name || 5538 'MO' === $tag_name || 5539 'MN' === $tag_name || 5540 'MS' === $tag_name || 5541 'MTEXT' === $tag_name 5542 ); 5543 } 5544 5545 /** 5546 * Indicates if the current token is an HTML integration point. 5547 * 5548 * Note that this method must be an instance method with access 5549 * to the current token, since it needs to examine the attributes 5550 * of the currently-matched tag, if it's in the MathML namespace. 5551 * Otherwise it would be required to scan the HTML and ensure that 5552 * no other accounting is overlooked. 5553 * 5554 * @since 6.7.0 5555 * 5556 * @see https://html.spec.whatwg.org/#html-integration-point 5557 * 5558 * @return bool Whether the current token is an HTML integration point. 5559 */ 5560 private function is_html_integration_point(): bool { 5561 $current_token = $this->state->current_token; 5562 if ( ! isset( $current_token ) ) { 5563 return false; 5564 } 5565 5566 if ( 'html' === $current_token->namespace ) { 5567 return false; 5568 } 5569 5570 $tag_name = $current_token->node_name; 5571 5572 if ( 'svg' === $current_token->namespace ) { 5573 return ( 5574 'DESC' === $tag_name || 5575 'FOREIGNOBJECT' === $tag_name || 5576 'TITLE' === $tag_name 5577 ); 5578 } 5579 5580 if ( 'math' === $current_token->namespace ) { 5581 if ( 'ANNOTATION-XML' !== $tag_name ) { 5582 return false; 5583 } 5584 5585 $encoding = $this->get_attribute( 'encoding' ); 5586 5587 return ( 5588 is_string( $encoding ) && 5589 ( 5590 0 === strcasecmp( $encoding, 'application/xhtml+xml' ) || 5591 0 === strcasecmp( $encoding, 'text/html' ) 5592 ) 5593 ); 5594 } 5595 } 5596 5597 /** 5140 5598 * Returns whether an element of a given name is in the HTML special category. 5141 5599 * … … 5144 5602 * @see https://html.spec.whatwg.org/#special 5145 5603 * 5146 * @param string $tag_name Name of element to check.5604 * @param WP_HTML_Token|string $tag_name Node to check, or only its name if in the HTML namespace. 5147 5605 * @return bool Whether the element of the given name is in the special category. 5148 5606 */ 5149 5607 public static function is_special( $tag_name ): bool { 5150 $tag_name = strtoupper( $tag_name ); 5608 if ( is_string( $tag_name ) ) { 5609 $tag_name = strtoupper( $tag_name ); 5610 } else { 5611 $tag_name = 'html' === $tag_name->namespace 5612 ? strtoupper( $tag_name->node_name ) 5613 : "{$tag_name->namespace} {$tag_name->node_name}"; 5614 } 5151 5615 5152 5616 return ( … … 5236 5700 5237 5701 // MathML. 5238 ' MI' === $tag_name ||5239 ' MO' === $tag_name ||5240 ' MN' === $tag_name ||5241 ' MS' === $tag_name ||5242 ' MTEXT' === $tag_name ||5243 ' ANNOTATION-XML' === $tag_name ||5702 'math MI' === $tag_name || 5703 'math MO' === $tag_name || 5704 'math MN' === $tag_name || 5705 'math MS' === $tag_name || 5706 'math MTEXT' === $tag_name || 5707 'math ANNOTATION-XML' === $tag_name || 5244 5708 5245 5709 // SVG. 5246 ' FOREIGNOBJECT' === $tag_name ||5247 ' DESC' === $tag_name ||5248 ' TITLE' === $tag_name5710 'svg DESC' === $tag_name || 5711 'svg FOREIGNOBJECT' === $tag_name || 5712 'svg TITLE' === $tag_name 5249 5713 ); 5250 5714 } -
trunk/src/wp-includes/html-api/class-wp-html-tag-processor.php
r58866 r58867 513 513 514 514 /** 515 * Indicates whether the parser is inside foreign content, 516 * e.g. inside an SVG or MathML element. 517 * 518 * One of 'html', 'svg', or 'math'. 519 * 520 * Several parsing rules change based on whether the parser 521 * is inside foreign content, including whether CDATA sections 522 * are allowed and whether a self-closing flag indicates that 523 * an element has no content. 524 * 525 * @since 6.7.0 526 * 527 * @var string 528 */ 529 private $parsing_namespace = 'html'; 530 531 /** 515 532 * What kind of syntax token became an HTML comment. 516 533 * … … 779 796 public function __construct( $html ) { 780 797 $this->html = $html; 798 } 799 800 /** 801 * Switches parsing mode into a new namespace, such as when 802 * encountering an SVG tag and entering foreign content. 803 * 804 * @since 6.7.0 805 * 806 * @param string $new_namespace One of 'html', 'svg', or 'math' indicating into what 807 * namespace the next tokens will be processed. 808 * @return bool Whether the namespace was valid and changed. 809 */ 810 public function change_parsing_namespace( string $new_namespace ): bool { 811 if ( ! in_array( $new_namespace, array( 'html', 'math', 'svg' ), true ) ) { 812 return false; 813 } 814 815 $this->parsing_namespace = $new_namespace; 816 return true; 781 817 } 782 818 … … 844 880 * 845 881 * @since 6.5.0 882 * @since 6.7.0 Recognizes CDATA sections within foreign content. 846 883 * 847 884 * @return bool Whether a token was parsed. … … 957 994 if ( 958 995 $this->is_closing_tag || 996 'html' !== $this->parsing_namespace || 959 997 1 !== strspn( $this->html, 'iIlLnNpPsStTxX', $this->tag_name_starts_at, 1 ) 960 998 ) { … … 997 1035 998 1036 // Find the closing tag if necessary. 999 $found_closer = false;1000 1037 switch ( $tag_name ) { 1001 1038 case 'SCRIPT': … … 1757 1794 $this->text_length = $closer_at - $this->text_starts_at; 1758 1795 $this->bytes_already_parsed = $closer_at + 1; 1796 return true; 1797 } 1798 1799 if ( 1800 'html' !== $this->parsing_namespace && 1801 strlen( $html ) > $at + 8 && 1802 '[' === $html[ $at + 2 ] && 1803 'C' === $html[ $at + 3 ] && 1804 'D' === $html[ $at + 4 ] && 1805 'A' === $html[ $at + 5 ] && 1806 'T' === $html[ $at + 6 ] && 1807 'A' === $html[ $at + 7 ] && 1808 '[' === $html[ $at + 8 ] 1809 ) { 1810 $closer_at = strpos( $html, ']]>', $at + 9 ); 1811 if ( false === $closer_at ) { 1812 $this->parser_state = self::STATE_INCOMPLETE_INPUT; 1813 1814 return false; 1815 } 1816 1817 $this->parser_state = self::STATE_CDATA_NODE; 1818 $this->text_starts_at = $at + 9; 1819 $this->text_length = $closer_at - $this->text_starts_at; 1820 $this->token_length = $closer_at + 3 - $this->token_starts_at; 1821 $this->bytes_already_parsed = $closer_at + 3; 1759 1822 return true; 1760 1823 } … … 2655 2718 2656 2719 /** 2720 * Returns the namespace of the matched token. 2721 * 2722 * @since 6.7.0 2723 * 2724 * @return string One of 'html', 'math', or 'svg'. 2725 */ 2726 public function get_namespace(): string { 2727 return $this->parsing_namespace; 2728 } 2729 2730 /** 2657 2731 * Returns the uppercase name of the matched tag. 2658 2732 * … … 2689 2763 2690 2764 return null; 2765 } 2766 2767 /** 2768 * Returns the adjusted tag name for a given token, taking into 2769 * account the current parsing context, whether HTML, SVG, or MathML. 2770 * 2771 * @since 6.7.0 2772 * 2773 * @return string|null Name of current tag name. 2774 */ 2775 public function get_qualified_tag_name(): ?string { 2776 $tag_name = $this->get_tag(); 2777 if ( null === $tag_name ) { 2778 return null; 2779 } 2780 2781 if ( 'html' === $this->get_namespace() ) { 2782 return $tag_name; 2783 } 2784 2785 $lower_tag_name = strtolower( $tag_name ); 2786 if ( 'math' === $this->get_namespace() ) { 2787 return $lower_tag_name; 2788 } 2789 2790 if ( 'svg' === $this->get_namespace() ) { 2791 switch ( $lower_tag_name ) { 2792 case 'altglyph': 2793 return 'altGlyph'; 2794 2795 case 'altglyphdef': 2796 return 'altGlyphDef'; 2797 2798 case 'altglyphitem': 2799 return 'altGlyphItem'; 2800 2801 case 'animatecolor': 2802 return 'animateColor'; 2803 2804 case 'animatemotion': 2805 return 'animateMotion'; 2806 2807 case 'animatetransform': 2808 return 'animateTransform'; 2809 2810 case 'clippath': 2811 return 'clipPath'; 2812 2813 case 'feblend': 2814 return 'feBlend'; 2815 2816 case 'fecolormatrix': 2817 return 'feColorMatrix'; 2818 2819 case 'fecomponenttransfer': 2820 return 'feComponentTransfer'; 2821 2822 case 'fecomposite': 2823 return 'feComposite'; 2824 2825 case 'feconvolvematrix': 2826 return 'feConvolveMatrix'; 2827 2828 case 'fediffuselighting': 2829 return 'feDiffuseLighting'; 2830 2831 case 'fedisplacementmap': 2832 return 'feDisplacementMap'; 2833 2834 case 'fedistantlight': 2835 return 'feDistantLight'; 2836 2837 case 'fedropshadow': 2838 return 'feDropShadow'; 2839 2840 case 'feflood': 2841 return 'feFlood'; 2842 2843 case 'fefunca': 2844 return 'feFuncA'; 2845 2846 case 'fefuncb': 2847 return 'feFuncB'; 2848 2849 case 'fefuncg': 2850 return 'feFuncG'; 2851 2852 case 'fefuncr': 2853 return 'feFuncR'; 2854 2855 case 'fegaussianblur': 2856 return 'feGaussianBlur'; 2857 2858 case 'feimage': 2859 return 'feImage'; 2860 2861 case 'femerge': 2862 return 'feMerge'; 2863 2864 case 'femergenode': 2865 return 'feMergeNode'; 2866 2867 case 'femorphology': 2868 return 'feMorphology'; 2869 2870 case 'feoffset': 2871 return 'feOffset'; 2872 2873 case 'fepointlight': 2874 return 'fePointLight'; 2875 2876 case 'fespecularlighting': 2877 return 'feSpecularLighting'; 2878 2879 case 'fespotlight': 2880 return 'feSpotLight'; 2881 2882 case 'fetile': 2883 return 'feTile'; 2884 2885 case 'feturbulence': 2886 return 'feTurbulence'; 2887 2888 case 'foreignobject': 2889 return 'foreignObject'; 2890 2891 case 'glyphref': 2892 return 'glyphRef'; 2893 2894 case 'lineargradient': 2895 return 'linearGradient'; 2896 2897 case 'radialgradient': 2898 return 'radialGradient'; 2899 2900 case 'textpath': 2901 return 'textPath'; 2902 2903 default: 2904 return $lower_tag_name; 2905 } 2906 } 2907 } 2908 2909 /** 2910 * Returns the adjusted attribute name for a given attribute, taking into 2911 * account the current parsing context, whether HTML, SVG, or MathML. 2912 * 2913 * @since 6.7.0 2914 * 2915 * @param string $attribute_name Which attribute to adjust. 2916 * 2917 * @return string|null 2918 */ 2919 public function get_qualified_attribute_name( $attribute_name ): ?string { 2920 if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { 2921 return null; 2922 } 2923 2924 $namespace = $this->get_namespace(); 2925 $lower_name = strtolower( $attribute_name ); 2926 2927 if ( 'math' === $namespace && 'definitionurl' === $lower_name ) { 2928 return 'definitionURL'; 2929 } 2930 2931 if ( 'svg' === $this->get_namespace() ) { 2932 switch ( $lower_name ) { 2933 case 'attributename': 2934 return 'attributeName'; 2935 2936 case 'attributetype': 2937 return 'attributeType'; 2938 2939 case 'basefrequency': 2940 return 'baseFrequency'; 2941 2942 case 'baseprofile': 2943 return 'baseProfile'; 2944 2945 case 'calcmode': 2946 return 'calcMode'; 2947 2948 case 'clippathunits': 2949 return 'clipPathUnits'; 2950 2951 case 'diffuseconstant': 2952 return 'diffuseConstant'; 2953 2954 case 'edgemode': 2955 return 'edgeMode'; 2956 2957 case 'filterunits': 2958 return 'filterUnits'; 2959 2960 case 'glyphref': 2961 return 'glyphRef'; 2962 2963 case 'gradienttransform': 2964 return 'gradientTransform'; 2965 2966 case 'gradientunits': 2967 return 'gradientUnits'; 2968 2969 case 'kernelmatrix': 2970 return 'kernelMatrix'; 2971 2972 case 'kernelunitlength': 2973 return 'kernelUnitLength'; 2974 2975 case 'keypoints': 2976 return 'keyPoints'; 2977 2978 case 'keysplines': 2979 return 'keySplines'; 2980 2981 case 'keytimes': 2982 return 'keyTimes'; 2983 2984 case 'lengthadjust': 2985 return 'lengthAdjust'; 2986 2987 case 'limitingconeangle': 2988 return 'limitingConeAngle'; 2989 2990 case 'markerheight': 2991 return 'markerHeight'; 2992 2993 case 'markerunits': 2994 return 'markerUnits'; 2995 2996 case 'markerwidth': 2997 return 'markerWidth'; 2998 2999 case 'maskcontentunits': 3000 return 'maskContentUnits'; 3001 3002 case 'maskunits': 3003 return 'maskUnits'; 3004 3005 case 'numoctaves': 3006 return 'numOctaves'; 3007 3008 case 'pathlength': 3009 return 'pathLength'; 3010 3011 case 'patterncontentunits': 3012 return 'patternContentUnits'; 3013 3014 case 'patterntransform': 3015 return 'patternTransform'; 3016 3017 case 'patternunits': 3018 return 'patternUnits'; 3019 3020 case 'pointsatx': 3021 return 'pointsAtX'; 3022 3023 case 'pointsaty': 3024 return 'pointsAtY'; 3025 3026 case 'pointsatz': 3027 return 'pointsAtZ'; 3028 3029 case 'preservealpha': 3030 return 'preserveAlpha'; 3031 3032 case 'preserveaspectratio': 3033 return 'preserveAspectRatio'; 3034 3035 case 'primitiveunits': 3036 return 'primitiveUnits'; 3037 3038 case 'refx': 3039 return 'refX'; 3040 3041 case 'refy': 3042 return 'refY'; 3043 3044 case 'repeatcount': 3045 return 'repeatCount'; 3046 3047 case 'repeatdur': 3048 return 'repeatDur'; 3049 3050 case 'requiredextensions': 3051 return 'requiredExtensions'; 3052 3053 case 'requiredfeatures': 3054 return 'requiredFeatures'; 3055 3056 case 'specularconstant': 3057 return 'specularConstant'; 3058 3059 case 'specularexponent': 3060 return 'specularExponent'; 3061 3062 case 'spreadmethod': 3063 return 'spreadMethod'; 3064 3065 case 'startoffset': 3066 return 'startOffset'; 3067 3068 case 'stddeviation': 3069 return 'stdDeviation'; 3070 3071 case 'stitchtiles': 3072 return 'stitchTiles'; 3073 3074 case 'surfacescale': 3075 return 'surfaceScale'; 3076 3077 case 'systemlanguage': 3078 return 'systemLanguage'; 3079 3080 case 'tablevalues': 3081 return 'tableValues'; 3082 3083 case 'targetx': 3084 return 'targetX'; 3085 3086 case 'targety': 3087 return 'targetY'; 3088 3089 case 'textlength': 3090 return 'textLength'; 3091 3092 case 'viewbox': 3093 return 'viewBox'; 3094 3095 case 'viewtarget': 3096 return 'viewTarget'; 3097 3098 case 'xchannelselector': 3099 return 'xChannelSelector'; 3100 3101 case 'ychannelselector': 3102 return 'yChannelSelector'; 3103 3104 case 'zoomandpan': 3105 return 'zoomAndPan'; 3106 } 3107 } 3108 3109 if ( 'html' !== $namespace ) { 3110 switch ( $lower_name ) { 3111 case 'xlink:actuate': 3112 return 'xlink actuate'; 3113 3114 case 'xlink:arcrole': 3115 return 'xlink arcrole'; 3116 3117 case 'xlink:href': 3118 return 'xlink href'; 3119 3120 case 'xlink:role': 3121 return 'xlink role'; 3122 3123 case 'xlink:show': 3124 return 'xlink show'; 3125 3126 case 'xlink:title': 3127 return 'xlink title'; 3128 3129 case 'xlink:type': 3130 return 'xlink type'; 3131 3132 case 'xml:lang': 3133 return 'xml lang'; 3134 3135 case 'xml:space': 3136 return 'xml space'; 3137 3138 case 'xmlns': 3139 return 'xmlns'; 3140 3141 case 'xmlns:xlink': 3142 return 'xmlns xlink'; 3143 } 3144 } 3145 3146 return $attribute_name; 2691 3147 } 2692 3148 … … 2964 3420 * for security reasons (to avoid joining together strings that were safe 2965 3421 * when separated, but not when joined). 3422 * 3423 * @todo Inside HTML integration points and MathML integration points, the 3424 * text is processed according to the insertion mode, not according 3425 * to the foreign content rules. This should strip the NULL bytes. 2966 3426 */ 2967 return '#text' === $tag_name3427 return ( '#text' === $tag_name && 'html' === $this->get_namespace() ) 2968 3428 ? str_replace( "\x00", '', $decoded ) 2969 3429 : str_replace( "\x00", "\u{FFFD}", $decoded ); -
trunk/src/wp-includes/html-api/class-wp-html-token.php
r58779 r58867 62 62 63 63 /** 64 * Indicates if the element is an HTML element or if it's inside foreign content. 65 * 66 * @since 6.7.0 67 * 68 * @var string 'html', 'svg', or 'math'. 69 */ 70 public $namespace = 'html'; 71 72 /** 73 * Indicates which kind of integration point the element is, if any. 74 * 75 * @since 6.7.0 76 * 77 * @var string|null 'math', 'html', or null if not an integration point. 78 */ 79 public $integration_node_type = null; 80 81 /** 64 82 * Called when token is garbage-collected or otherwise destroyed. 65 83 * … … 81 99 public function __construct( ?string $bookmark_name, string $node_name, bool $has_self_closing_flag, ?callable $on_destroy = null ) { 82 100 $this->bookmark_name = $bookmark_name; 101 $this->namespace = 'html'; 83 102 $this->node_name = $node_name; 84 103 $this->has_self_closing_flag = $has_self_closing_flag; -
trunk/tests/phpunit/tests/html-api/wpHtmlProcessor.php
r58828 r58867 360 360 361 361 /** 362 * Ensures that special handling of unsupported tags is cleaned up363 * as handling is implemented. Otherwise there's risk of leaving special364 * handling (that is never reached) when tag handling is implemented.365 *366 * @ticket 60092367 *368 * @dataProvider data_unsupported_special_in_body_tags369 *370 * @covers WP_HTML_Processor::step_in_body371 *372 * @param string $tag_name Name of the tag to test.373 */374 public function test_step_in_body_fails_on_unsupported_tags( $tag_name ) {375 $fragment = WP_HTML_Processor::create_fragment( '<' . $tag_name . '></' . $tag_name . '>' );376 $this->assertFalse( $fragment->next_tag(), 'Should fail to find tag: ' . $tag_name . '.' );377 $this->assertEquals( $fragment->get_last_error(), WP_HTML_Processor::ERROR_UNSUPPORTED, 'Should have unsupported last error.' );378 }379 380 /**381 * Data provider.382 *383 * @return array[]384 */385 public static function data_unsupported_special_in_body_tags() {386 return array(387 'MATH' => array( 'MATH' ),388 'SVG' => array( 'SVG' ),389 );390 }391 392 /**393 362 * Ensures that the HTML Processor properly reports the depth of a given element. 394 363 * -
trunk/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
r58836 r58867 166 166 167 167 /** 168 * Ensures that no new HTML elements are accidentally partially-supported.169 *170 * When introducing support for new HTML elements, there are multiple places171 * in the HTML Processor that need to be updated, until the time that the class172 * has full HTML5 support. Because of this, these tests lock down the interface173 * to ensure that support isn't accidentally updated in one place for a new174 * element while overlooked in another.175 *176 * @ticket 58517177 *178 * @covers WP_HTML_Processor::step179 *180 * @dataProvider data_unsupported_elements181 *182 * @param string $html HTML string containing unsupported elements.183 */184 public function test_fails_when_encountering_unsupported_tag( $html ) {185 $processor = WP_HTML_Processor::create_fragment( $html );186 187 $this->assertFalse( $processor->step(), "Should not have stepped into unsupported {$processor->get_tag()} element." );188 }189 190 /**191 * Data provider.192 *193 * @return array[]194 */195 public static function data_unsupported_elements() {196 $unsupported_elements = array(197 'MATH',198 'PLAINTEXT', // Neutralized.199 'SVG',200 );201 202 $data = array();203 foreach ( $unsupported_elements as $tag_name ) {204 $data[ $tag_name ] = array( "<{$tag_name}>" );205 }206 207 return $data;208 }209 210 /**211 168 * @ticket 58517 212 169 * -
trunk/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
r58859 r58867 182 182 183 183 if ( $was_text && '#text' !== $token_name ) { 184 $output .= "{$text_node}\"\n"; 184 if ( '' !== $text_node ) { 185 $output .= "{$text_node}\"\n"; 186 } 185 187 $was_text = false; 186 188 $text_node = ''; … … 189 191 switch ( $token_type ) { 190 192 case '#tag': 191 $tag_name = strtolower( $token_name ); 193 $namespace = $processor->get_namespace(); 194 $tag_name = 'html' === $namespace 195 ? strtolower( $processor->get_tag() ) 196 : "{$namespace} {$processor->get_qualified_tag_name()}"; 192 197 193 198 if ( $is_closer ) { 194 199 --$indent_level; 195 200 196 if ( ' TEMPLATE' === $token_name ) {201 if ( 'html' === $namespace && 'TEMPLATE' === $token_name ) { 197 202 --$indent_level; 198 203 } … … 203 208 $tag_indent = $indent_level; 204 209 205 if ( ! WP_HTML_Processor::is_void( $tag_name ) ) { 210 if ( 'html' !== $namespace ) { 211 if ( ! $processor->has_self_closing_flag() ) { 212 ++$indent_level; 213 } 214 } elseif ( ! WP_HTML_Processor::is_void( $tag_name ) ) { 206 215 ++$indent_level; 207 216 } … … 211 220 $attribute_names = $processor->get_attribute_names_with_prefix( '' ); 212 221 if ( $attribute_names ) { 213 sort( $attribute_names, SORT_STRING ); 214 222 $sorted_attributes = array(); 215 223 foreach ( $attribute_names as $attribute_name ) { 224 $sorted_attributes[ $attribute_name ] = $processor->get_qualified_attribute_name( $attribute_name ); 225 } 226 227 /* 228 * Sorts attributes to match html5lib sort order. 229 * 230 * - First comes normal HTML attributes. 231 * - Then come adjusted foreign attributes; these have spaces in their names. 232 * - Finally come non-adjusted foreign attributes; these have a colon in their names. 233 * 234 * Example: 235 * 236 * From: <math xlink:author definitionurl xlink:title xlink:show> 237 * Sorted: 'definitionURL', 'xlink show', 'xlink title', 'xlink:author' 238 */ 239 uasort( 240 $sorted_attributes, 241 static function ( $a, $b ) { 242 $a_has_ns = str_contains( $a, ':' ); 243 $b_has_ns = str_contains( $b, ':' ); 244 245 // Attributes with `:` should follow all other attributes. 246 if ( $a_has_ns !== $b_has_ns ) { 247 return $a_has_ns ? 1 : -1; 248 } 249 250 $a_has_sp = str_contains( $a, ' ' ); 251 $b_has_sp = str_contains( $b, ' ' ); 252 253 // Attributes with a namespace ' ' should come after those without. 254 if ( $a_has_sp !== $b_has_sp ) { 255 return $a_has_sp ? 1 : -1; 256 } 257 258 return $a <=> $b; 259 } 260 ); 261 262 foreach ( $sorted_attributes as $attribute_name => $display_name ) { 216 263 $val = $processor->get_attribute( $attribute_name ); 217 264 /* … … 222 269 $val = ''; 223 270 } 224 $output .= str_repeat( $indent, $tag_indent + 1 ) . "{$ attribute_name}=\"{$val}\"\n";271 $output .= str_repeat( $indent, $tag_indent + 1 ) . "{$display_name}=\"{$val}\"\n"; 225 272 } 226 273 } … … 232 279 } 233 280 234 if ( ' TEMPLATE' === $token_name ) {281 if ( 'html' === $namespace && 'TEMPLATE' === $token_name ) { 235 282 $output .= str_repeat( $indent, $indent_level ) . "content\n"; 236 283 ++$indent_level; … … 243 290 break; 244 291 292 case '#cdata-section': 245 293 case '#text': 294 $text_content = $processor->get_modifiable_text(); 295 if ( '' === $text_content ) { 296 break; 297 } 246 298 $was_text = true; 247 299 if ( '' === $text_node ) { 248 300 $text_node .= str_repeat( $indent, $indent_level ) . '"'; 249 301 } 250 $text_node .= $ processor->get_modifiable_text();302 $text_node .= $text_content; 251 303 break; 252 304 -
trunk/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php
r58779 r58867 1 <?php2 /**3 * Unit tests for the HTML API indicating that changes are needed to the4 * WP_HTML_Open_Elements class before specific features are added to the API.5 *6 * Note! Duplication of test cases and the helper function in this file are intentional.7 * This test file exists to warn developers of related areas of code that need to update8 * together when adding support for new elements to the HTML Processor. For example,9 * when adding support for the BUTTON element it's necessary to update multiple methods10 * in the class governing the stack of open elements as well as the HTML Processor class11 * itself. This is because each element might bring with it semantic rules that impact12 * the way the document should be parsed. BUTTON creates a kind of boundary in the13 * DOM tree and implicitly closes existing open BUTTON elements.14 *15 * Without these tests a developer needs to investigate all possible places they16 * might need to update when adding support for more elements and risks overlooking17 * important parts that, in the absence of the related support, will lead to errors.18 *19 * @package WordPress20 * @subpackage HTML-API21 *22 * @since 6.4.023 *24 * @group html-api25 *26 * @coversDefaultClass WP_HTML_Processor27 */28 class Tests_HtmlApi_WpHtmlSupportRequiredOpenElements extends WP_UnitTestCase {29 /**30 * Fails to assert if the HTML Processor handles the given tag.31 *32 * This test helper is used throughout this test file for one purpose only: to33 * fail a test if the HTML Processor handles the given tag. In other words, it34 * ensures that the HTML Processor aborts when encountering the given tag.35 *36 * This is used to ensure that when support for a new tag is added to the37 * HTML Processor it receives full support and not partial support, which38 * could lead to a variety of issues.39 *40 * Do not remove this helper function as it provides semantic meaning to the41 * assertions in the tests in this file and its behavior is incredibly specific42 * and limited and doesn't warrant adding a new abstraction into WP_UnitTestCase.43 *44 * @param string $tag_name the HTML Processor should abort when encountering this tag, e.g. "BUTTON".45 */46 private function ensure_support_is_added_everywhere( $tag_name ) {47 $processor = WP_HTML_Processor::create_fragment( "<$tag_name>" );48 49 $this->assertFalse( $processor->step(), "Must support terminating elements in specific scope check before adding support for the {$tag_name} element." );50 }51 52 /**53 * The check for whether an element is in a scope depends on54 * looking for a number of terminating elements in the stack of open55 * elements. Until the listed elements are supported in the HTML56 * processor, there are no terminating elements and there's no57 * point in taking the time to look for them.58 *59 * @since 6.4.060 *61 * @ticket 5851762 */63 public function test_has_element_in_scope_needs_support() {64 // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML.65 $this->ensure_support_is_added_everywhere( 'MATH' );66 67 /*68 * SVG elements: note that TITLE is both an HTML element and an SVG element69 * so care must be taken when adding support for either one.70 *71 * FOREIGNOBJECT, DESC, TITLE.72 */73 $this->ensure_support_is_added_everywhere( 'SVG' );74 }75 76 /**77 * The check for whether an element is in list item scope depends on78 * the elements for any scope, plus UL and OL.79 *80 * The method for asserting list item scope doesn't currently exist81 * because the LI element isn't yet supported and the LI element is82 * the only element that needs to know about list item scope.83 *84 * @since 6.4.085 *86 * @ticket 5851787 *88 * @covers WP_HTML_Open_Elements::has_element_in_list_item_scope89 */90 public function test_has_element_in_list_item_scope_needs_support() {91 // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML.92 $this->ensure_support_is_added_everywhere( 'MATH' );93 94 /*95 * SVG elements: note that TITLE is both an HTML element and an SVG element96 * so care must be taken when adding support for either one.97 *98 * FOREIGNOBJECT, DESC, TITLE.99 */100 $this->ensure_support_is_added_everywhere( 'SVG' );101 }102 103 /**104 * The check for whether an element is in BUTTON scope depends on105 * the elements for any scope, plus BUTTON.106 *107 * @since 6.4.0108 *109 * @ticket 58517110 *111 * @covers WP_HTML_Open_Elements::has_element_in_button_scope112 */113 public function test_has_element_in_button_scope_needs_support() {114 // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML.115 $this->ensure_support_is_added_everywhere( 'MATH' );116 117 /*118 * SVG elements: note that TITLE is both an HTML element and an SVG element119 * so care must be taken when adding support for either one.120 *121 * FOREIGNOBJECT, DESC, TITLE.122 */123 $this->ensure_support_is_added_everywhere( 'SVG' );124 }125 126 /**127 * The optimization maintaining a flag for "P is in BUTTON scope" requires128 * updating that flag every time an element is popped from the stack of129 * open elements.130 *131 * @since 6.4.0132 *133 * @ticket 58517134 *135 * @covers WP_HTML_Open_Elements::after_element_pop136 */137 public function test_after_element_pop_must_maintain_p_in_button_scope_flag() {138 // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML.139 $this->ensure_support_is_added_everywhere( 'MATH' );140 141 /*142 * SVG elements: note that TITLE is both an HTML element and an SVG element143 * so care must be taken when adding support for either one.144 *145 * FOREIGNOBJECT, DESC, TITLE.146 */147 $this->ensure_support_is_added_everywhere( 'SVG' );148 }149 150 /**151 * The optimization maintaining a flag for "P is in BUTTON scope" requires152 * updating that flag every time an element is pushed onto the stack of153 * open elements.154 *155 * @since 6.4.0156 *157 * @ticket 58517158 *159 * @covers WP_HTML_Open_Elements::after_element_push160 */161 public function test_after_element_push_must_maintain_p_in_button_scope_flag() {162 // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML.163 $this->ensure_support_is_added_everywhere( 'MATH' );164 165 /*166 * SVG elements: note that TITLE is both an HTML element and an SVG element167 * so care must be taken when adding support for either one.168 *169 * FOREIGNOBJECT, DESC, TITLE.170 */171 $this->ensure_support_is_added_everywhere( 'SVG' );172 }173 174 /**175 * The check for whether an element is in TABLE scope depends on176 * the HTML, TABLE, and TEMPLATE elements.177 *178 * @since 6.4.0179 *180 * @ticket 58517181 *182 * @covers WP_HTML_Open_Elements::has_element_in_table_scope183 */184 public function test_has_element_in_table_scope_needs_support() {185 // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML.186 $this->ensure_support_is_added_everywhere( 'MATH' );187 188 /*189 * SVG elements: note that TITLE is both an HTML element and an SVG element190 * so care must be taken when adding support for either one.191 *192 * FOREIGNOBJECT, DESC, TITLE.193 */194 $this->ensure_support_is_added_everywhere( 'SVG' );195 }196 197 /**198 * The check for whether an element is in SELECT scope depends on199 * the OPTGROUP and OPTION elements.200 *201 * @since 6.4.0202 *203 * @ticket 58517204 *205 * @covers WP_HTML_Open_Elements::has_element_in_select_scope206 */207 public function test_has_element_in_select_scope_needs_support() {208 // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML.209 $this->ensure_support_is_added_everywhere( 'MATH' );210 211 /*212 * SVG elements: note that TITLE is both an HTML element and an SVG element213 * so care must be taken when adding support for either one.214 *215 * FOREIGNOBJECT, DESC, TITLE.216 */217 $this->ensure_support_is_added_everywhere( 'SVG' );218 }219 } -
trunk/tests/phpunit/tests/html-api/wpHtmlTagProcessor-token-scanning.php
r58779 r58867 514 514 515 515 /** 516 * Ensures that basic CDATA sections inside foreign content are detected. 517 * 518 * @ticket 61576 519 */ 520 public function test_basic_cdata_in_foreign_content() { 521 $processor = new WP_HTML_Tag_Processor( '<svg><![CDATA[this is >> real CDATA]]></svg>' ); 522 $processor->next_token(); 523 524 // Artificially change namespace; this should be done in the HTML Processor. 525 $processor->change_parsing_namespace( 'svg' ); 526 $processor->next_token(); 527 528 $this->assertSame( 529 '#cdata-section', 530 $processor->get_token_name(), 531 "Should have found a CDATA section but found {$processor->get_token_name()} instead." 532 ); 533 534 $this->assertNull( 535 $processor->get_tag(), 536 'Should not have been able to query tag name on non-element token.' 537 ); 538 539 $this->assertNull( 540 $processor->get_attribute( 'type' ), 541 'Should not have been able to query attributes on non-element token.' 542 ); 543 544 $this->assertSame( 545 'this is >> real CDATA', 546 $processor->get_modifiable_text(), 547 'Found incorrect modifiable text.' 548 ); 549 } 550 551 /** 552 * Ensures that empty CDATA sections inside foreign content are detected. 553 * 554 * @ticket 61576 555 */ 556 public function test_empty_cdata_in_foreign_content() { 557 $processor = new WP_HTML_Tag_Processor( '<svg><![CDATA[]]></svg>' ); 558 $processor->next_token(); 559 560 // Artificially change namespace; this should be done in the HTML Processor. 561 $processor->change_parsing_namespace( 'svg' ); 562 $processor->next_token(); 563 564 $this->assertSame( 565 '#cdata-section', 566 $processor->get_token_name(), 567 "Should have found a CDATA section but found {$processor->get_token_name()} instead." 568 ); 569 570 $this->assertEmpty( 571 $processor->get_modifiable_text(), 572 'Found non-empty modifiable text.' 573 ); 574 } 575 576 /** 516 577 * Ensures that normative Processing Instruction nodes are properly parsed. 517 578 *
Note: See TracChangeset
for help on using the changeset viewer.