| 227 | * A collection of methods for parsing and executing shortcodes in content. |
| 228 | */ |
| 229 | class Shortcode_Parser { |
| 230 | private $content; |
| 231 | private $state; |
| 232 | private $cursor_position; |
| 233 | private $stack; |
| 234 | private $tagnames; |
| 235 | private $current_shortcode; |
| 236 | |
| 237 | const SHORTCODE_PARSE_STATE_DEFAULT = 0; |
| 238 | const SHORTCODE_PARSE_STATE_IN_TAG = 1; |
| 239 | const SHORTCODE_PARSE_STATE_IN_CONTENT = 2; |
| 240 | const SHORTCODE_PARSE_STATE_IN_QUOTED_STRING = 3; |
| 241 | |
| 242 | private $DEBUG = false; |
| 243 | |
| 244 | public function __construct( $content, $tagnames ) { |
| 245 | $this->content = $content; |
| 246 | $this->tagnames = $tagnames; |
| 247 | } |
| 248 | |
| 249 | /** |
| 250 | * Parse shortcodes in content and replace them with the output that their handler functions generate. |
| 251 | */ |
| 252 | public function parse() { |
| 253 | $this->stack = array(); |
| 254 | |
| 255 | /** |
| 256 | * A regular expression that checks whether a string appears to begin with a tag for |
| 257 | * a registered shortcode. |
| 258 | */ |
| 259 | $registered_shortcode_regex= '/^(?P<extra_opening_bracket>\\[?)(?P<opening_bracket>\\[)(?P<tag_slug>' . join( '|', array_map( 'preg_quote', $this->tagnames ) ) . ')(?![\\w-])/u'; |
| 260 | |
| 261 | $this->cursor_position = 0; |
| 262 | |
| 263 | // Save some parsing time by starting a few characters before the first bracket. |
| 264 | $this->forward_cursor_to_next_bracket(); |
| 265 | |
| 266 | $this->state = self::SHORTCODE_PARSE_STATE_DEFAULT; |
| 267 | |
| 268 | $is_escaped = false; |
| 269 | $delimiter = null; |
| 270 | |
| 271 | $this->debug( 'Parsing content: ' . $this->content ); |
| 272 | |
| 273 | while ( $this->cursor_position < strlen( $this->content ) ) { |
| 274 | $char = substr( $this->content, $this->cursor_position, 1 ); |
| 275 | |
| 276 | $this->debug( 'In position ' . $this->cursor_position . ' with state ' . $this->state . ', looking at character "' . $char . '"' ); |
| 277 | |
| 278 | $found_escape_character = false; |
| 279 | |
| 280 | switch ( $this->state ) { |
| 281 | case self::SHORTCODE_PARSE_STATE_DEFAULT: |
| 282 | case self::SHORTCODE_PARSE_STATE_IN_CONTENT: |
| 283 | if ( |
| 284 | ! $is_escaped |
| 285 | && '[' === $char |
| 286 | && preg_match( $registered_shortcode_regex, substr( $this->content, $this->cursor_position ), $m ) ) { |
| 287 | if ( $this->current_shortcode ) { |
| 288 | $this->stack[] = $this->current_shortcode; |
| 289 | } |
| 290 | |
| 291 | // We have found the beginning of a shortcode. |
| 292 | $this->current_shortcode = array( |
| 293 | 'full_tag' => $m[0], |
| 294 | 'extra_opening_bracket' => $m['extra_opening_bracket'], |
| 295 | 'tag_slug' => $m['tag_slug'], |
| 296 | 'atts_and_values' => '', |
| 297 | 'self_closing_slash' => '', |
| 298 | 'inner_content' => '', |
| 299 | 'extra_closing_bracket' => '', |
| 300 | 'cursor_position' => $this->cursor_position, |
| 301 | ); |
| 302 | |
| 303 | $this->cursor_position += strlen( $m[0] ); |
| 304 | |
| 305 | $this->debug( 'Found "' . $m[0] . '", moving to position ' . $this->cursor_position ); |
| 306 | $this->debug( $this->current_shortcode ); |
| 307 | |
| 308 | // Move back one so it's as if we just processed the last character of the shortcode slug. |
| 309 | $this->cursor_position--; |
| 310 | |
| 311 | $this->state = self::SHORTCODE_PARSE_STATE_IN_TAG; |
| 312 | } else { |
| 313 | if ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state ) { |
| 314 | $this->current_shortcode['full_tag'] .= $char; |
| 315 | } |
| 316 | |
| 317 | if ( ! $is_escaped && '\\' === $char ) { |
| 318 | // The next character is escaped. |
| 319 | $found_escape_character = true; |
| 320 | |
| 321 | if ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state ) { |
| 322 | $this->current_shortcode['inner_content'] .= $char; |
| 323 | } |
| 324 | } elseif ( $is_escaped ) { |
| 325 | if ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state ) { |
| 326 | $this->current_shortcode['inner_content'] .= $char; |
| 327 | } |
| 328 | } elseif ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state && '[' === $char ) { |
| 329 | // Check whether it's a closing tag of any currently open shortcode. |
| 330 | $rest_of_closing_tag = '/' . $this->current_shortcode['tag_slug'] . ']'; |
| 331 | |
| 332 | if ( $rest_of_closing_tag === substr( $this->content, $this->cursor_position + 1, strlen( $rest_of_closing_tag ) ) ) { |
| 333 | // The end of this shortcode. |
| 334 | |
| 335 | $this->current_shortcode['full_tag'] .= $rest_of_closing_tag; |
| 336 | |
| 337 | // Move the cursor to the end of the closing tag. |
| 338 | $this->cursor_position += strlen( $rest_of_closing_tag ); |
| 339 | |
| 340 | if ( $this->current_shortcode['extra_opening_bracket'] ) { |
| 341 | if ( ']' === substr( $this->content, $this->cursor_position + 1, 1 ) ) { |
| 342 | $this->current_shortcode['full_tag'] .= ']'; |
| 343 | $this->current_shortcode['extra_closing_bracket'] = ']'; |
| 344 | $this->cursor_position++; |
| 345 | } else { |
| 346 | // If there was an extra opening bracket but not an extra closing bracket, |
| 347 | // ignore the extra opening bracket. |
| 348 | |
| 349 | $this->current_shortcode['full_tag'] = substr( $this->current_shortcode['full_tag'], 1 ); |
| 350 | $this->current_shortcode['extra_opening_bracket'] = ''; |
| 351 | |
| 352 | // We initially thought it had an extra opening bracket, but it doesn't, |
| 353 | // so it started one character later than we thought. |
| 354 | $this->current_shortcode['cursor_position'] += 1; |
| 355 | } |
| 356 | } |
| 357 | |
| 358 | $this->process_current_shortcode(); |
| 359 | } else { |
| 360 | $this->debug( 'The closing tag was not for the currently open shortcode.' ); |
| 361 | |
| 362 | $found_matching_shortcode = false; |
| 363 | |
| 364 | for ( $stack_index = count( $this->stack ) - 1; $stack_index >= 0; $stack_index-- ) { |
| 365 | $rest_of_closing_tag = '/' . $this->stack[ $stack_index ]['tag_slug'] . ']'; |
| 366 | |
| 367 | if ( $rest_of_closing_tag === substr( $this->content, $this->cursor_position + 1, strlen( $rest_of_closing_tag ) ) ) { |
| 368 | // Yes, it closes this one. |
| 369 | $found_matching_shortcode = true; |
| 370 | |
| 371 | if ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state ) { |
| 372 | // We already saved the bracket as part of the full tag, expecting that the closing tag would be for the current shortcode. |
| 373 | // It's not, so remove it. |
| 374 | $this->current_shortcode['full_tag'] = substr( $this->current_shortcode['full_tag'], 0, -1 ); |
| 375 | } |
| 376 | |
| 377 | $this->debug( 'The closing tag was for this shortcode:', $this->stack[ $stack_index ] ); |
| 378 | |
| 379 | // This means that the "current" shortcode and any others above this one on the stack need to be closed out, because they are self-closing. |
| 380 | do { |
| 381 | $this->debug( 'Inner content was:', $this->current_shortcode['inner_content'], 'Full tag was:', $this->current_shortcode['full_tag'] ); |
| 382 | |
| 383 | $this->current_shortcode['full_tag'] = substr( $this->current_shortcode['full_tag'], 0, -1 * strlen( $this->current_shortcode['inner_content'] ) ); |
| 384 | |
| 385 | // And there is no inner content. |
| 386 | $this->current_shortcode['inner_content'] = ''; |
| 387 | |
| 388 | $this->process_current_shortcode(); // This sets $current_shortcode using the top stack item, so we don't need to do it. |
| 389 | } while ( count( $this->stack ) > $stack_index + 1 ); |
| 390 | |
| 391 | // At this point, the shortcode that is being closed right now is $this->current_shortcode. |
| 392 | // The easiest way to process this without duplicating code is to reprocess the current |
| 393 | // character with the new stack and current shortcode, so the section above will get |
| 394 | // triggered, since the closing tag will be for the current shortcode. |
| 395 | |
| 396 | $this->debug( 'Restarting this iteration of the parsing loop with new stack structure.' ); |
| 397 | continue 3; |
| 398 | } |
| 399 | } |
| 400 | |
| 401 | |
| 402 | if ( ! $found_matching_shortcode ) { |
| 403 | $this->current_shortcode['inner_content'] .= $char; |
| 404 | } |
| 405 | } |
| 406 | } elseif ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state ) { |
| 407 | $this->current_shortcode['inner_content'] .= $char; |
| 408 | } |
| 409 | } |
| 410 | |
| 411 | break; |
| 412 | case self::SHORTCODE_PARSE_STATE_IN_TAG: |
| 413 | $this->current_shortcode['full_tag'] .= $char; |
| 414 | |
| 415 | if ( ! $is_escaped && '/' === $char && substr( $this->content, $this->cursor_position + 1, 1 ) === ']' ) { |
| 416 | // The shortcode is over. |
| 417 | $this->current_shortcode['self_closing_slash'] = '/'; |
| 418 | $this->current_shortcode['full_tag'] .= ']'; |
| 419 | $this->cursor_position++; |
| 420 | |
| 421 | // If the shortcode had an extra opening bracket but doesn't have an extra |
| 422 | // closing bracket, ignore the extra opening bracket. |
| 423 | |
| 424 | if ( $this->current_shortcode['extra_opening_bracket'] ) { |
| 425 | if ( ']' === substr( $this->content, $this->cursor_position + 1, 1 ) ) { |
| 426 | $this->current_shortcode['extra_closing_bracket'] = ']'; |
| 427 | $this->current_shortcode['full_tag'] .= ']'; |
| 428 | $this->cursor_position++; |
| 429 | } else { |
| 430 | $this->current_shortcode['full_tag'] = substr( $this->current_shortcode['full_tag'], 1 ); |
| 431 | $this->current_shortcode['extra_opening_bracket'] = ''; |
| 432 | |
| 433 | // We initially thought it had an extra opening bracket, but it doesn't, |
| 434 | // so it started one character later than we thought. |
| 435 | $this->current_shortcode['cursor_position'] += 1; |
| 436 | } |
| 437 | } |
| 438 | |
| 439 | $this->process_current_shortcode(); |
| 440 | |
| 441 | break; |
| 442 | } elseif ( ! $is_escaped && ']' === $char ) { |
| 443 | if ( $this->current_shortcode['extra_opening_bracket'] ) { |
| 444 | // This makes the assumption that this shortcode is closed as soon as the double brackets are found: |
| 445 | // |
| 446 | // [[my-shortcode]][/my-shortcode]] |
| 447 | // |
| 448 | // But in theory, this could just be a shortcode with the content "]". |
| 449 | |
| 450 | if ( ']' === substr( $this->content, $this->cursor_position + 1, 1 ) ) { |
| 451 | $this->current_shortcode['extra_closing_bracket'] = ']'; |
| 452 | $this->current_shortcode['full_tag'] .= ']'; |
| 453 | $this->cursor_position++; |
| 454 | |
| 455 | $this->process_current_shortcode(); |
| 456 | break; |
| 457 | } else { |
| 458 | // There was not an extra closing bracket. |
| 459 | $this->debug( 'Extra closing bracket not found; the character was ' . substr( $this->content, $this->cursor_position + 1, 1 ) ); |
| 460 | } |
| 461 | } |
| 462 | |
| 463 | if ( false === strpos( substr( $this->content, $this->cursor_position ), '[/' . $this->current_shortcode['tag_slug'] . ']' ) ) { |
| 464 | // If there's no closing tag, it's a self-enclosed shortcode, and we're done with it. |
| 465 | $this->process_current_shortcode(); |
| 466 | } else { |
| 467 | $this->debug( 'Expecting to find a closing tag for ' . $this->current_shortcode['tag_slug'] ); |
| 468 | $this->state = self::SHORTCODE_PARSE_STATE_IN_CONTENT; |
| 469 | } |
| 470 | } else { |
| 471 | $this->current_shortcode['atts_and_values'] .= $char; |
| 472 | |
| 473 | if ( ! $is_escaped && '\\' === $char ) { |
| 474 | $found_escape_character = true; |
| 475 | } elseif ( ! $is_escaped && ( '"' === $char || "'" === $char ) ) { |
| 476 | $this->state = self::SHORTCODE_PARSE_STATE_IN_QUOTED_STRING; |
| 477 | $delimiter = $char; |
| 478 | } else { |
| 479 | // Nothing to do. |
| 480 | } |
| 481 | } |
| 482 | |
| 483 | break; |
| 484 | case self::SHORTCODE_PARSE_STATE_IN_QUOTED_STRING: |
| 485 | $this->current_shortcode['full_tag'] .= $char; |
| 486 | $this->current_shortcode['atts_and_values'] .= $char; |
| 487 | |
| 488 | if ( $is_escaped ) { |
| 489 | // Nothing to do. This is just an escaped character to be taken literally. |
| 490 | } else { |
| 491 | // Not escaped. |
| 492 | if ( '\\' === $char ) { |
| 493 | // The next character is escaped. |
| 494 | $found_escape_character = true; |
| 495 | } elseif ( $char === $delimiter ) { |
| 496 | $this->state = self::SHORTCODE_PARSE_STATE_IN_TAG; |
| 497 | $delimiter = null; |
| 498 | } |
| 499 | } |
| 500 | |
| 501 | break; |
| 502 | } |
| 503 | |
| 504 | // Is the next character escaped? |
| 505 | if ( $found_escape_character ) { |
| 506 | $is_escaped = true; |
| 507 | } else { |
| 508 | // If we didn't find an escape character here, then no. |
| 509 | $is_escaped = false; |
| 510 | } |
| 511 | |
| 512 | $this->cursor_position++; |
| 513 | |
| 514 | $this->debug( 'Cursor position is ' . $this->cursor_position . '; strlen is ' . strlen( $this->content ) ); |
| 515 | } |
| 516 | |
| 517 | if ( self::SHORTCODE_PARSE_STATE_IN_QUOTED_STRING === $this->state ) { |
| 518 | // example: This is my content [footag foo=" [bartag] |
| 519 | // Should it be reprocessed in order to convert [bartag] or is this considered malformed? |
| 520 | } |
| 521 | |
| 522 | if ( self::SHORTCODE_PARSE_STATE_IN_TAG === $this->state ) { |
| 523 | // example: This is my content [footag foo="abc" bar="def" [bartag] |
| 524 | // Should it be reprocessed in order to convert [bartag] or is this considered malformed? |
| 525 | } |
| 526 | |
| 527 | if ( $this->current_shortcode ) { |
| 528 | $this->debug( 'There are still pending shortcodes to process.', $this->current_shortcode, $this->stack ); |
| 529 | |
| 530 | /* |
| 531 | * If we end with shortcodes still on the stack, then there was a situation like this: |
| 532 | * |
| 533 | * [footag] [bartag] [baztag] [footag]content[/footag] |
| 534 | * |
| 535 | * i.e., a scenario where the parser was unsure whether the first [footag] was self-closing or not. |
| 536 | * |
| 537 | * By this point, $content will be in this format: |
| 538 | * |
| 539 | * [footag] bartag-output baztag-output footag-content-output |
| 540 | * |
| 541 | * so we need to back up and process the still-stored shortcodes as unclosed. |
| 542 | * |
| 543 | * An extreme version of this would look like: |
| 544 | * |
| 545 | * [footag] [footag] [footag] [footag] [footag] [footag] [footag] [footag] ... [footag][/footag] |
| 546 | * |
| 547 | * where the last tag would be the only one processed normally above and there would be n-1 [footag]s still on the stack. |
| 548 | */ |
| 549 | while ( $this->current_shortcode ) { |
| 550 | // What we thought was part of this tag was just regular content. |
| 551 | $this->current_shortcode['full_tag'] = substr( $this->current_shortcode['full_tag'], 0, -1 * strlen( $this->current_shortcode['inner_content'] ) ); |
| 552 | |
| 553 | // And there is no inner content. |
| 554 | $this->current_shortcode['inner_content'] = ''; |
| 555 | |
| 556 | $this->process_current_shortcode(); // This sets $current_shortcode, so we don't need to do it. |
| 557 | } |
| 558 | } |
| 559 | |
| 560 | return $this->content; |
| 561 | } |
| 562 | |
| 563 | /** |
| 564 | * Create an argument to pass to do_shortcode_tag. The format of this argument was determined |
| 565 | * by the capture groups of the regular expression that used to be used to parse shortcodes out of content. |
| 566 | * |
| 567 | * @param array $shortcode An associative array comprising data about a shortcode in the text. |
| 568 | * @return array A numerically-indexed array of the shortcode data ready for do_shortcode_tag(). |
| 569 | */ |
| 570 | private function shortcode_argument( $shortcode ) { |
| 571 | return array( |
| 572 | $shortcode['full_tag'], |
| 573 | $shortcode['extra_opening_bracket'], |
| 574 | $shortcode['tag_slug'], |
| 575 | $shortcode['atts_and_values'], |
| 576 | $shortcode['self_closing_slash'], |
| 577 | $shortcode['inner_content'], |
| 578 | $shortcode['extra_closing_bracket'], |
| 579 | ); |
| 580 | } |
| 581 | |
| 582 | /** |
| 583 | * The shortcode at the top of the stack is complete and can be processed. |
| 584 | * Process it and modify the enclosing shortcode as if the content was passed in |
| 585 | * with this shortcode already converted into HTML. |
| 586 | */ |
| 587 | private function process_current_shortcode() { |
| 588 | $this->debug( 'Content is: ' . $this->content ); |
| 589 | |
| 590 | $this->debug( $this->current_shortcode ); |
| 591 | |
| 592 | $argument_for_do_shortcode_tag = $this->shortcode_argument( $this->current_shortcode ); |
| 593 | |
| 594 | $shortcode_output = do_shortcode_tag( $argument_for_do_shortcode_tag ); |
| 595 | |
| 596 | // Replace based on position rather than find and replace, since this content is possible: |
| 597 | // |
| 598 | // Test 123 [some-shortcode] To use my shortcode, type [[some-shortcode]]. |
| 599 | $this->content = |
| 600 | substr( $this->content, 0, $this->current_shortcode['cursor_position'] ) |
| 601 | . $shortcode_output |
| 602 | . substr( $this->content, $this->current_shortcode['cursor_position'] + strlen( $this->current_shortcode['full_tag'] ) ) |
| 603 | ; |
| 604 | |
| 605 | // Update the cursor position to the end of this shortcode's output. |
| 606 | // The -1 is because the position is incremented after this gets called to move it to the next character. |
| 607 | $this->cursor_position = $this->current_shortcode['cursor_position'] + strlen( $shortcode_output ) - 1; |
| 608 | |
| 609 | // For any enclosing shortcode, its inner content needs to include the full output of this shortcode. |
| 610 | if ( ! empty( $this->stack ) ) { |
| 611 | $this->current_shortcode = array_pop( $this->stack ); |
| 612 | |
| 613 | $this->current_shortcode['inner_content'] .= $shortcode_output; |
| 614 | $this->current_shortcode['full_tag'] .= $shortcode_output; |
| 615 | |
| 616 | $this->state = self::SHORTCODE_PARSE_STATE_IN_CONTENT; |
| 617 | } else { |
| 618 | $this->current_shortcode = null; |
| 619 | |
| 620 | $this->state = self::SHORTCODE_PARSE_STATE_DEFAULT; |
| 621 | |
| 622 | // In the default state, we can skip over any content that couldn't be a shortcode, |
| 623 | // so let's move forward near the next bracket. |
| 624 | $this->forward_cursor_to_next_bracket(); |
| 625 | } |
| 626 | |
| 627 | $this->debug( 'Content is: ' . $this->content ); |
| 628 | } |
| 629 | |
| 630 | private function forward_cursor_to_next_bracket() { |
| 631 | /* |
| 632 | * The max() here is because $cursor_position can be -1 if a shortcode |
| 633 | * at the beginning of the content didn't have any output and reset the |
| 634 | * cursor back to the beginning. It's -1 instead of zero because it will |
| 635 | * be incremented later in the loop to set it to zero for the next iteration. |
| 636 | */ |
| 637 | $next_bracket_location = strpos( $this->content, '[', max( 0, $this->cursor_position ) ); |
| 638 | |
| 639 | if ( false !== $next_bracket_location ) { |
| 640 | $this->debug( 'Current cursor position: ' . $this->cursor_position ); |
| 641 | |
| 642 | /* |
| 643 | * Again, the -1 is because this will be incremented before it is used, |
| 644 | * and we really want it to have a minimum value of zero. |
| 645 | */ |
| 646 | $this->cursor_position = max( -1, $next_bracket_location - 2 ); |
| 647 | |
| 648 | $this->debug( 'Jumped ahead to position ' . $this->cursor_position ); |
| 649 | } |
| 650 | } |
| 651 | |
| 652 | /** |
| 653 | * Outputs debug data. Useful when running unit tests to see how content is being parsed. |
| 654 | * |
| 655 | * @param mixed One of more variables of any type. |
| 656 | */ |
| 657 | private function debug( /* ... */ ) { |
| 658 | if ( defined( 'WP_DEBUG' ) && WP_DEBUG && $this->DEBUG ) { |
| 659 | foreach ( func_get_args() as $arg ) { |
| 660 | if ( 'string' === gettype( $arg ) ) { |
| 661 | error_log( 'Shortcode_Parser debug: ' . $arg ); |
| 662 | } else { |
| 663 | error_log( 'Shortcode_Parser debug: ' . var_export( $arg, true ) ); |
| 664 | } |
| 665 | } |
| 666 | } |
| 667 | } |
| 668 | |
| 669 | } |
| 670 | |
| 671 | /** |