| 227 | * A collection of methods for parsing and executing shortcodes in content. |
| 228 | */ |
| 229 | class Shortcode_Parser { |
| 230 | /** |
| 231 | * The content being parsed. |
| 232 | * |
| 233 | * @var string $content |
| 234 | */ |
| 235 | private $content; |
| 236 | |
| 237 | /** |
| 238 | * The current state of the parser. |
| 239 | * |
| 240 | * @var int $state One of the SHORTCODE_PARSE_SATE_* constants listed below. |
| 241 | */ |
| 242 | private $state; |
| 243 | |
| 244 | /** |
| 245 | * The current position of the parsing cursor in $content. |
| 246 | * |
| 247 | * @var int $cursor_position |
| 248 | */ |
| 249 | private $cursor_position; |
| 250 | |
| 251 | /** |
| 252 | * The stack of unprocessed shortcodes. |
| 253 | * |
| 254 | * As shortcodes are opened, they are placed on the stack, and as they're |
| 255 | * closed, they're processed and removed. |
| 256 | * |
| 257 | * @var array $stack |
| 258 | */ |
| 259 | private $stack; |
| 260 | |
| 261 | /** |
| 262 | * A list of all registered shortcode tag names. |
| 263 | * |
| 264 | * @var array $tagnames |
| 265 | */ |
| 266 | private $tagnames; |
| 267 | |
| 268 | /** |
| 269 | * The shortcode currently being parsed. |
| 270 | * |
| 271 | * As a shortcode is being parsed, it is stored here. If a new shortcode is |
| 272 | * found before parsing is complete, this shortcode is moved to the stack, |
| 273 | * and the new shortcode is stored here. |
| 274 | * |
| 275 | * @var array $current_shortcode { |
| 276 | * @type string $full_tag The content that makes up this tag, from the opening bracket |
| 277 | * to the closing bracket. |
| 278 | * @type string $extra_opening_bracket Either the character '[' or an empty string, |
| 279 | * depending on whether there was an extra opening |
| 280 | * bracket for this shortcode. Essentially a flag. |
| 281 | * @type string $tag_slug The shortcode tag slug. |
| 282 | * @type string $atts_and_values The (unparsed) part of the shortcode tag that contains |
| 283 | * attributes and their optional values. |
| 284 | * @type string $self_closing_slash Like $extra_opening_bracket, this is either the |
| 285 | * slash used to self-close the shortcode tag, or an |
| 286 | * empty string. |
| 287 | * @type int $cursor_position The cursor position where this shortcode began. |
| 288 | * } |
| 289 | */ |
| 290 | private $current_shortcode; |
| 291 | |
| 292 | /** |
| 293 | * The default parsing state -- the cursor is not in a shortcode tag or |
| 294 | * shortcode content or a quoted string in a shortcode attribute value. |
| 295 | * |
| 296 | * @var int $SHORTCODE_PARSE_STATE_DEFAULT |
| 297 | */ |
| 298 | const SHORTCODE_PARSE_STATE_DEFAULT = 0; |
| 299 | |
| 300 | /** |
| 301 | * The cursor is inside the shortcode tag, past the shortcode tag slug. |
| 302 | * |
| 303 | * @var int $SHORTCODE_PARSE_STATE_IN_TAG |
| 304 | */ |
| 305 | const SHORTCODE_PARSE_STATE_IN_TAG = 1; |
| 306 | |
| 307 | /** |
| 308 | * The cursor is in the content of the shortcode -- past the opening tag |
| 309 | * but not yet to the closing tag. |
| 310 | * |
| 311 | * @var int $SHORTCODE_PARSE_STATE_IN_CONTENT |
| 312 | */ |
| 313 | const SHORTCODE_PARSE_STATE_IN_CONTENT = 2; |
| 314 | |
| 315 | /** |
| 316 | * The cursor is inside of a quoted string in the shortcode tag. |
| 317 | * |
| 318 | * @var int $SHORTCODE_PARSE_STATE_IN_QUOTED_STRING |
| 319 | */ |
| 320 | const SHORTCODE_PARSE_STATE_IN_QUOTED_STRING = 3; |
| 321 | |
| 322 | /** |
| 323 | * Store the content and tag names for later use. |
| 324 | * |
| 325 | * @param string $content The HTML/text content to parse for shortcodes. |
| 326 | * @param array $tagnames An array of string shortcode tag names. |
| 327 | */ |
| 328 | public function __construct( $content, $tagnames ) { |
| 329 | $this->content = $content; |
| 330 | $this->tagnames = $tagnames; |
| 331 | } |
| 332 | |
| 333 | /** |
| 334 | * Parse shortcodes in content and replace them with the output that their |
| 335 | * handler functions generate. |
| 336 | * |
| 337 | * @return string The content with shortcodes replaced by their output. |
| 338 | */ |
| 339 | public function parse() { |
| 340 | $this->stack = array(); |
| 341 | |
| 342 | /* |
| 343 | * A regular expression that checks whether a string appears to begin |
| 344 | * with a tag for a registered shortcode. |
| 345 | */ |
| 346 | $registered_shortcode_regex= '/^(?P<extra_opening_bracket>\\[?)(?P<opening_bracket>\\[)(?P<tag_slug>' . join( '|', array_map( 'preg_quote', $this->tagnames ) ) . ')(?![\\w-])/u'; |
| 347 | |
| 348 | $this->cursor_position = 0; |
| 349 | |
| 350 | // Save some parsing time by starting a few characters before the first bracket. |
| 351 | $this->forward_cursor_to_next_bracket(); |
| 352 | |
| 353 | $this->state = self::SHORTCODE_PARSE_STATE_DEFAULT; |
| 354 | |
| 355 | $is_escaped = false; |
| 356 | $delimiter = null; |
| 357 | |
| 358 | while ( $this->cursor_position < strlen( $this->content ) ) { |
| 359 | $char = substr( $this->content, $this->cursor_position, 1 ); |
| 360 | |
| 361 | $found_escape_character = false; |
| 362 | |
| 363 | switch ( $this->state ) { |
| 364 | case self::SHORTCODE_PARSE_STATE_DEFAULT: |
| 365 | case self::SHORTCODE_PARSE_STATE_IN_CONTENT: |
| 366 | if ( |
| 367 | ! $is_escaped |
| 368 | && '[' === $char |
| 369 | && preg_match( $registered_shortcode_regex, substr( $this->content, $this->cursor_position ), $m ) ) { |
| 370 | if ( $this->current_shortcode ) { |
| 371 | $this->stack[] = $this->current_shortcode; |
| 372 | } |
| 373 | |
| 374 | // We have found the beginning of a shortcode. |
| 375 | $this->current_shortcode = array( |
| 376 | 'full_tag' => $m[0], |
| 377 | 'extra_opening_bracket' => $m['extra_opening_bracket'], |
| 378 | 'tag_slug' => $m['tag_slug'], |
| 379 | 'atts_and_values' => '', |
| 380 | 'self_closing_slash' => '', |
| 381 | 'inner_content' => '', |
| 382 | 'extra_closing_bracket' => '', |
| 383 | 'cursor_position' => $this->cursor_position, |
| 384 | ); |
| 385 | |
| 386 | $this->cursor_position += strlen( $m[0] ); |
| 387 | |
| 388 | // Move back one so it's as if we just processed the last character of the shortcode slug. |
| 389 | $this->cursor_position--; |
| 390 | |
| 391 | $this->state = self::SHORTCODE_PARSE_STATE_IN_TAG; |
| 392 | } elseif ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state ) { |
| 393 | $this->current_shortcode['full_tag'] .= $char; |
| 394 | |
| 395 | if ( '[' === $char ) { |
| 396 | // Check whether it's a closing tag of any currently open shortcode. |
| 397 | $rest_of_closing_tag = '/' . $this->current_shortcode['tag_slug'] . ']'; |
| 398 | |
| 399 | if ( $rest_of_closing_tag === substr( $this->content, $this->cursor_position + 1, strlen( $rest_of_closing_tag ) ) ) { |
| 400 | // The end of this shortcode. |
| 401 | |
| 402 | $this->current_shortcode['full_tag'] .= $rest_of_closing_tag; |
| 403 | |
| 404 | // Move the cursor to the end of the closing tag. |
| 405 | $this->cursor_position += strlen( $rest_of_closing_tag ); |
| 406 | |
| 407 | if ( $this->current_shortcode['extra_opening_bracket'] ) { |
| 408 | if ( ']' === substr( $this->content, $this->cursor_position + 1, 1 ) ) { |
| 409 | $this->current_shortcode['full_tag'] .= ']'; |
| 410 | $this->current_shortcode['extra_closing_bracket'] = ']'; |
| 411 | $this->cursor_position++; |
| 412 | } else { |
| 413 | // If there was an extra opening bracket but not an extra closing bracket, ignore the extra opening bracket. |
| 414 | |
| 415 | $this->current_shortcode['full_tag'] = substr( $this->current_shortcode['full_tag'], 1 ); |
| 416 | $this->current_shortcode['extra_opening_bracket'] = ''; |
| 417 | |
| 418 | // We initially thought it had an extra opening bracket, but it doesn't so it started one character later than we thought. |
| 419 | $this->current_shortcode['cursor_position'] += 1; |
| 420 | } |
| 421 | } |
| 422 | |
| 423 | $this->process_current_shortcode(); |
| 424 | } else { |
| 425 | $found_matching_shortcode = false; |
| 426 | |
| 427 | for ( $stack_index = count( $this->stack ) - 1; $stack_index >= 0; $stack_index-- ) { |
| 428 | $rest_of_closing_tag = '/' . $this->stack[ $stack_index ]['tag_slug'] . ']'; |
| 429 | |
| 430 | if ( $rest_of_closing_tag === substr( $this->content, $this->cursor_position + 1, strlen( $rest_of_closing_tag ) ) ) { |
| 431 | // Yes, it closes this one. |
| 432 | $found_matching_shortcode = true; |
| 433 | |
| 434 | /* |
| 435 | * We already saved the bracket as part of the full tag, expecting that the |
| 436 | * closing tag would be for the current shortcode. It's not, so remove it. |
| 437 | */ |
| 438 | $this->current_shortcode['full_tag'] = substr( $this->current_shortcode['full_tag'], 0, -1 ); |
| 439 | |
| 440 | // This means that the "current" shortcode and any others above this one on the stack need to be closed out, because they are self-closing. |
| 441 | do { |
| 442 | $this->current_shortcode['full_tag'] = substr( $this->current_shortcode['full_tag'], 0, -1 * strlen( $this->current_shortcode['inner_content'] ) ); |
| 443 | |
| 444 | // And there is no inner content. |
| 445 | $this->current_shortcode['inner_content'] = ''; |
| 446 | |
| 447 | $this->process_current_shortcode(); // This sets $current_shortcode using the top stack item, so we don't need to do it. |
| 448 | } while ( count( $this->stack ) > $stack_index + 1 ); |
| 449 | |
| 450 | /* |
| 451 | * At this point, the shortcode that is being closed right now is $this->current_shortcode. |
| 452 | * The easiest way to process this without duplicating code is to reprocess the current |
| 453 | * character with the new stack and current shortcode, so the section above will get |
| 454 | * triggered, since the closing tag will be for the current shortcode. |
| 455 | */ |
| 456 | |
| 457 | continue 3; |
| 458 | } |
| 459 | } |
| 460 | |
| 461 | if ( ! $found_matching_shortcode ) { |
| 462 | $this->current_shortcode['inner_content'] .= $char; |
| 463 | } |
| 464 | } |
| 465 | } else { |
| 466 | $this->current_shortcode['inner_content'] .= $char; |
| 467 | } |
| 468 | } |
| 469 | |
| 470 | break; |
| 471 | case self::SHORTCODE_PARSE_STATE_IN_TAG: |
| 472 | $this->current_shortcode['full_tag'] .= $char; |
| 473 | |
| 474 | if ( '/' === $char && substr( $this->content, $this->cursor_position + 1, 1 ) === ']' ) { |
| 475 | // The shortcode is over. |
| 476 | $this->current_shortcode['self_closing_slash'] = '/'; |
| 477 | $this->current_shortcode['full_tag'] .= ']'; |
| 478 | $this->cursor_position++; |
| 479 | |
| 480 | // If the shortcode had an extra opening bracket but doesn't have an extra closing bracket, ignore the extra opening bracket. |
| 481 | |
| 482 | if ( $this->current_shortcode['extra_opening_bracket'] ) { |
| 483 | if ( ']' === substr( $this->content, $this->cursor_position + 1, 1 ) ) { |
| 484 | $this->current_shortcode['extra_closing_bracket'] = ']'; |
| 485 | $this->current_shortcode['full_tag'] .= ']'; |
| 486 | $this->cursor_position++; |
| 487 | } else { |
| 488 | $this->current_shortcode['full_tag'] = substr( $this->current_shortcode['full_tag'], 1 ); |
| 489 | $this->current_shortcode['extra_opening_bracket'] = ''; |
| 490 | |
| 491 | /* |
| 492 | * We initially thought it had an extra opening bracket, but it doesn't, |
| 493 | * so it started one character later than we thought. |
| 494 | */ |
| 495 | $this->current_shortcode['cursor_position'] += 1; |
| 496 | } |
| 497 | } |
| 498 | |
| 499 | $this->process_current_shortcode(); |
| 500 | |
| 501 | break; |
| 502 | } elseif ( ']' === $char ) { |
| 503 | if ( $this->current_shortcode['extra_opening_bracket'] ) { |
| 504 | /* |
| 505 | * This makes the assumption that this shortcode is closed as soon as the double brackets are found: |
| 506 | * |
| 507 | * [[my-shortcode]][/my-shortcode]] |
| 508 | * |
| 509 | * But in theory, this could just be a shortcode with the content "]". |
| 510 | */ |
| 511 | |
| 512 | if ( ']' === substr( $this->content, $this->cursor_position + 1, 1 ) ) { |
| 513 | $this->current_shortcode['extra_closing_bracket'] = ']'; |
| 514 | $this->current_shortcode['full_tag'] .= ']'; |
| 515 | $this->cursor_position++; |
| 516 | |
| 517 | $this->process_current_shortcode(); |
| 518 | break; |
| 519 | } else { |
| 520 | // There was not an extra closing bracket. |
| 521 | } |
| 522 | } |
| 523 | |
| 524 | if ( false === strpos( substr( $this->content, $this->cursor_position ), '[/' . $this->current_shortcode['tag_slug'] . ']' ) ) { |
| 525 | // If there's no closing tag, it's a self-enclosed shortcode, and we're done with it. |
| 526 | $this->process_current_shortcode(); |
| 527 | } else { |
| 528 | $this->state = self::SHORTCODE_PARSE_STATE_IN_CONTENT; |
| 529 | |
| 530 | $current_cursor_position = $this->cursor_position; |
| 531 | $this->forward_cursor_to_next_bracket(); |
| 532 | |
| 533 | if ( $this->cursor_position != $current_cursor_position ) { |
| 534 | // The +1 is because the character at $current_cursor_position has already been recorded. |
| 535 | $skipped_content = substr( $this->content, $current_cursor_position + 1, $this->cursor_position - $current_cursor_position ); |
| 536 | |
| 537 | $this->current_shortcode['inner_content'] .= $skipped_content; |
| 538 | $this->current_shortcode['full_tag'] .= $skipped_content; |
| 539 | } |
| 540 | } |
| 541 | } else { |
| 542 | $this->current_shortcode['atts_and_values'] .= $char; |
| 543 | |
| 544 | if ( '"' === $char || "'" === $char ) { |
| 545 | $this->state = self::SHORTCODE_PARSE_STATE_IN_QUOTED_STRING; |
| 546 | $delimiter = $char; |
| 547 | } else { |
| 548 | // Nothing to do. |
| 549 | } |
| 550 | } |
| 551 | |
| 552 | break; |
| 553 | case self::SHORTCODE_PARSE_STATE_IN_QUOTED_STRING: |
| 554 | $this->current_shortcode['full_tag'] .= $char; |
| 555 | $this->current_shortcode['atts_and_values'] .= $char; |
| 556 | |
| 557 | if ( $is_escaped ) { |
| 558 | // Nothing to do. This is just an escaped character to be taken literally. |
| 559 | } else { |
| 560 | // Not escaped. |
| 561 | if ( '\\' === $char ) { |
| 562 | // The next character is escaped. |
| 563 | $found_escape_character = true; |
| 564 | } elseif ( $char === $delimiter ) { |
| 565 | $this->state = self::SHORTCODE_PARSE_STATE_IN_TAG; |
| 566 | $delimiter = null; |
| 567 | } |
| 568 | } |
| 569 | |
| 570 | break; |
| 571 | } |
| 572 | |
| 573 | // Is the next character escaped? |
| 574 | if ( $found_escape_character ) { |
| 575 | $is_escaped = true; |
| 576 | } else { |
| 577 | // If we didn't find an escape character here, then no. |
| 578 | $is_escaped = false; |
| 579 | } |
| 580 | |
| 581 | $this->cursor_position++; |
| 582 | } |
| 583 | |
| 584 | if ( self::SHORTCODE_PARSE_STATE_IN_QUOTED_STRING === $this->state ) { |
| 585 | /* |
| 586 | * example: This is my content [footag foo=" [bartag] |
| 587 | * Should it be reprocessed in order to convert [bartag] or is this considered malformed? |
| 588 | */ |
| 589 | } |
| 590 | |
| 591 | if ( self::SHORTCODE_PARSE_STATE_IN_TAG === $this->state ) { |
| 592 | /* |
| 593 | * example: This is my content [footag foo="abc" bar="def" [bartag] |
| 594 | * Should it be reprocessed in order to convert [bartag] or is this considered malformed? |
| 595 | */ |
| 596 | } |
| 597 | |
| 598 | if ( $this->current_shortcode ) { |
| 599 | /* |
| 600 | * If we end with shortcodes still on the stack, then there was a situation like this: |
| 601 | * |
| 602 | * [footag] [bartag] [baztag] [footag]content[/footag] |
| 603 | * |
| 604 | * i.e., a scenario where the parser was unsure whether the first [footag] was self-closing or not. |
| 605 | * |
| 606 | * By this point, $content will be in this format: |
| 607 | * |
| 608 | * [footag] bartag-output baztag-output footag-content-output |
| 609 | * |
| 610 | * so we need to back up and process the still-stored shortcodes as unclosed. |
| 611 | * |
| 612 | * An extreme version of this would look like: |
| 613 | * |
| 614 | * [footag] [footag] [footag] [footag] [footag] [footag] [footag] [footag] ... [footag][/footag] |
| 615 | * |
| 616 | * where the last tag would be the only one processed normally above and there would be n-1 [footag]s still on the stack. |
| 617 | */ |
| 618 | while ( $this->current_shortcode ) { |
| 619 | // What we thought was part of this tag was just regular content. |
| 620 | $this->current_shortcode['full_tag'] = substr( $this->current_shortcode['full_tag'], 0, -1 * strlen( $this->current_shortcode['inner_content'] ) ); |
| 621 | |
| 622 | // And there is no inner content. |
| 623 | $this->current_shortcode['inner_content'] = ''; |
| 624 | |
| 625 | $this->process_current_shortcode(); // This sets $current_shortcode, so we don't need to do it. |
| 626 | } |
| 627 | } |
| 628 | |
| 629 | return $this->content; |
| 630 | } |
| 631 | |
| 632 | /** |
| 633 | * Create an argument to pass to do_shortcode_tag. |
| 634 | * |
| 635 | * The format of this argument was determined by the capture groups of the |
| 636 | * regular expression that used to be used to parse shortcodes out of content. |
| 637 | * |
| 638 | * @param array $shortcode An associative array comprising data about a shortcode in the text. |
| 639 | * @return array A numerically-indexed array of the shortcode data ready for do_shortcode_tag(). |
| 640 | */ |
| 641 | private function shortcode_argument( $shortcode ) { |
| 642 | return array( |
| 643 | $shortcode['full_tag'], |
| 644 | $shortcode['extra_opening_bracket'], |
| 645 | $shortcode['tag_slug'], |
| 646 | $shortcode['atts_and_values'], |
| 647 | $shortcode['self_closing_slash'], |
| 648 | $shortcode['inner_content'], |
| 649 | $shortcode['extra_closing_bracket'], |
| 650 | ); |
| 651 | } |
| 652 | |
| 653 | /** |
| 654 | * Process the shortcode at the top of the stack. |
| 655 | * |
| 656 | * The shortcode at the top of the stack is complete and can be processed. |
| 657 | * Process it and modify the enclosing shortcode as if the content was passed in |
| 658 | * with this shortcode already converted into HTML. |
| 659 | */ |
| 660 | private function process_current_shortcode() { |
| 661 | $argument_for_do_shortcode_tag = $this->shortcode_argument( $this->current_shortcode ); |
| 662 | |
| 663 | $shortcode_output = do_shortcode_tag( $argument_for_do_shortcode_tag ); |
| 664 | |
| 665 | /* |
| 666 | * Replace based on position rather than find and replace, since this content is possible: |
| 667 | * |
| 668 | * Test 123 [some-shortcode] To use my shortcode, type [[some-shortcode]]. |
| 669 | */ |
| 670 | $this->content = |
| 671 | substr( $this->content, 0, $this->current_shortcode['cursor_position'] ) |
| 672 | . $shortcode_output |
| 673 | . substr( $this->content, $this->current_shortcode['cursor_position'] + strlen( $this->current_shortcode['full_tag'] ) ) |
| 674 | ; |
| 675 | |
| 676 | /* |
| 677 | * Update the cursor position to the end of this shortcode's output. |
| 678 | * The -1 is because the position is incremented after this gets called to move it to the next character. |
| 679 | */ |
| 680 | $this->cursor_position = $this->current_shortcode['cursor_position'] + strlen( $shortcode_output ) - 1; |
| 681 | |
| 682 | // For any enclosing shortcode, its inner content needs to include the full output of this shortcode. |
| 683 | if ( ! empty( $this->stack ) ) { |
| 684 | $this->current_shortcode = array_pop( $this->stack ); |
| 685 | |
| 686 | $this->current_shortcode['inner_content'] .= $shortcode_output; |
| 687 | $this->current_shortcode['full_tag'] .= $shortcode_output; |
| 688 | |
| 689 | $this->state = self::SHORTCODE_PARSE_STATE_IN_CONTENT; |
| 690 | |
| 691 | $current_cursor_position = $this->cursor_position; |
| 692 | $this->forward_cursor_to_next_bracket(); |
| 693 | |
| 694 | if ( $this->cursor_position != $current_cursor_position ) { |
| 695 | /* |
| 696 | * The +1 is because the character at $current_cursor_position has already been recorded. |
| 697 | */ |
| 698 | $skipped_content = substr( $this->content, $current_cursor_position + 1, $this->cursor_position - $current_cursor_position ); |
| 699 | |
| 700 | $this->current_shortcode['inner_content'] .= $skipped_content; |
| 701 | $this->current_shortcode['full_tag'] .= $skipped_content; |
| 702 | } |
| 703 | } else { |
| 704 | $this->current_shortcode = null; |
| 705 | |
| 706 | $this->state = self::SHORTCODE_PARSE_STATE_DEFAULT; |
| 707 | |
| 708 | // In the default state, we can skip over any content that couldn't be a shortcode, so let's move forward near the next bracket. |
| 709 | $this->forward_cursor_to_next_bracket(); |
| 710 | } |
| 711 | } |
| 712 | |
| 713 | /** |
| 714 | * Moves the parsing cursor to the next possible location that might |
| 715 | * include a shortcode. |
| 716 | * |
| 717 | * The specific location is directly before the next bracket or the end |
| 718 | * of the content if there is no next bracket. |
| 719 | */ |
| 720 | private function forward_cursor_to_next_bracket() { |
| 721 | /* |
| 722 | * The max() here is because $cursor_position can be -1 if a shortcode |
| 723 | * at the beginning of the content didn't have any output and reset the |
| 724 | * cursor back to the beginning. It's -1 instead of zero because it will |
| 725 | * be incremented later in the loop to set it to zero for the next iteration. |
| 726 | */ |
| 727 | $next_bracket_location = strpos( $this->content, '[', max( 0, $this->cursor_position ) ); |
| 728 | |
| 729 | if ( false === $next_bracket_location ) { |
| 730 | // There is no next bracket, so fast-forward to the end. |
| 731 | $next_bracket_location = strlen( $this->content ); |
| 732 | } |
| 733 | |
| 734 | /* |
| 735 | * Again, the -1 is because this will be incremented before it is used, |
| 736 | * and we really want it to have a minimum value of zero. |
| 737 | */ |
| 738 | $this->cursor_position = max( -1, $next_bracket_location - 1 ); |
| 739 | } |
| 740 | } |
| 741 | |
| 742 | /** |