| | 227 | * A collection of methods for parsing and executing shortcodes in content. |
| | 228 | */ |
| | 229 | class Shortcode_Parser { |
| | 230 | private $content; |
| | 231 | private $state; |
| | 232 | private $cursor_position; |
| | 233 | private $stack; |
| | 234 | private $tagnames; |
| | 235 | private $current_shortcode; |
| | 236 | |
| | 237 | const SHORTCODE_PARSE_STATE_DEFAULT = 0; |
| | 238 | const SHORTCODE_PARSE_STATE_IN_TAG = 1; |
| | 239 | const SHORTCODE_PARSE_STATE_IN_CONTENT = 2; |
| | 240 | const SHORTCODE_PARSE_STATE_IN_QUOTED_STRING = 3; |
| | 241 | |
| | 242 | private $DEBUG = false; |
| | 243 | |
| | 244 | public function __construct( $content, $tagnames ) { |
| | 245 | $this->content = $content; |
| | 246 | $this->tagnames = $tagnames; |
| | 247 | } |
| | 248 | |
| | 249 | /** |
| | 250 | * Parse shortcodes in content and replace them with the output that their handler functions generate. |
| | 251 | */ |
| | 252 | public function parse() { |
| | 253 | $this->stack = array(); |
| | 254 | |
| | 255 | /** |
| | 256 | * A regular expression that checks whether a string appears to begin with a tag for |
| | 257 | * a registered shortcode. |
| | 258 | */ |
| | 259 | $registered_shortcode_regex= '/^(?P<extra_opening_bracket>\\[?)(?P<opening_bracket>\\[)(?P<tag_slug>' . join( '|', array_map( 'preg_quote', $this->tagnames ) ) . ')(?![\\w-])/u'; |
| | 260 | |
| | 261 | $this->cursor_position = 0; |
| | 262 | |
| | 263 | // Save some parsing time by starting a few characters before the first bracket. |
| | 264 | $this->forward_cursor_to_next_bracket(); |
| | 265 | |
| | 266 | $this->state = self::SHORTCODE_PARSE_STATE_DEFAULT; |
| | 267 | |
| | 268 | $is_escaped = false; |
| | 269 | $delimiter = null; |
| | 270 | |
| | 271 | $this->debug( 'Parsing content: ' . $this->content ); |
| | 272 | |
| | 273 | while ( $this->cursor_position < mb_strlen( $this->content ) ) { |
| | 274 | $char = mb_substr( $this->content, $this->cursor_position, 1 ); |
| | 275 | |
| | 276 | $this->debug( 'In position ' . $this->cursor_position . ' with state ' . $this->state . ', looking at character "' . $char . '"' ); |
| | 277 | |
| | 278 | $found_escape_character = false; |
| | 279 | |
| | 280 | switch ( $this->state ) { |
| | 281 | case self::SHORTCODE_PARSE_STATE_DEFAULT: |
| | 282 | case self::SHORTCODE_PARSE_STATE_IN_CONTENT: |
| | 283 | if ( |
| | 284 | ! $is_escaped |
| | 285 | && '[' === $char |
| | 286 | && preg_match( $registered_shortcode_regex, mb_substr( $this->content, $this->cursor_position ), $m ) ) { |
| | 287 | if ( $this->current_shortcode ) { |
| | 288 | $this->stack[] = $this->current_shortcode; |
| | 289 | } |
| | 290 | |
| | 291 | // We have found the beginning of a shortcode. |
| | 292 | $this->current_shortcode = array( |
| | 293 | 'full_tag' => $m[0], |
| | 294 | 'extra_opening_bracket' => $m['extra_opening_bracket'], |
| | 295 | 'tag_slug' => $m['tag_slug'], |
| | 296 | 'atts_and_values' => '', |
| | 297 | 'self_closing_slash' => '', |
| | 298 | 'inner_content' => '', |
| | 299 | 'extra_closing_bracket' => '', |
| | 300 | 'cursor_position' => $this->cursor_position, |
| | 301 | ); |
| | 302 | |
| | 303 | $this->cursor_position += mb_strlen( $m[0] ); |
| | 304 | |
| | 305 | $this->debug( 'Found "' . $m[0] . '", moving to position ' . $this->cursor_position ); |
| | 306 | $this->debug( $this->current_shortcode ); |
| | 307 | |
| | 308 | // Move back one so it's as if we just processed the last character of the shortcode slug. |
| | 309 | $this->cursor_position--; |
| | 310 | |
| | 311 | $this->state = self::SHORTCODE_PARSE_STATE_IN_TAG; |
| | 312 | } else { |
| | 313 | if ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state ) { |
| | 314 | $this->current_shortcode['full_tag'] .= $char; |
| | 315 | } |
| | 316 | |
| | 317 | if ( ! $is_escaped && '\\' === $char ) { |
| | 318 | // The next character is escaped. |
| | 319 | $found_escape_character = true; |
| | 320 | |
| | 321 | if ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state ) { |
| | 322 | $this->current_shortcode['inner_content'] .= $char; |
| | 323 | } |
| | 324 | } elseif ( $is_escaped ) { |
| | 325 | if ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state ) { |
| | 326 | $this->current_shortcode['inner_content'] .= $char; |
| | 327 | } |
| | 328 | } elseif ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state && '[' === $char ) { |
| | 329 | // Check whether it's a closing tag of any currently open shortcode. |
| | 330 | $rest_of_closing_tag = '/' . $this->current_shortcode['tag_slug'] . ']'; |
| | 331 | |
| | 332 | if ( $rest_of_closing_tag === mb_substr( $this->content, $this->cursor_position + 1, mb_strlen( $rest_of_closing_tag ) ) ) { |
| | 333 | // The end of this shortcode. |
| | 334 | |
| | 335 | $this->current_shortcode['full_tag'] .= $rest_of_closing_tag; |
| | 336 | |
| | 337 | // Move the cursor to the end of the closing tag. |
| | 338 | $this->cursor_position += mb_strlen( $rest_of_closing_tag ); |
| | 339 | |
| | 340 | if ( $this->current_shortcode['extra_opening_bracket'] ) { |
| | 341 | if ( ']' === mb_substr( $this->content, $this->cursor_position + 1, 1 ) ) { |
| | 342 | $this->current_shortcode['full_tag'] .= ']'; |
| | 343 | $this->current_shortcode['extra_closing_bracket'] = ']'; |
| | 344 | $this->cursor_position++; |
| | 345 | } else { |
| | 346 | // If there was an extra opening bracket but not an extra closing bracket, |
| | 347 | // ignore the extra opening bracket. |
| | 348 | |
| | 349 | $this->current_shortcode['full_tag'] = mb_substr( $this->current_shortcode['full_tag'], 1 ); |
| | 350 | $this->current_shortcode['extra_opening_bracket'] = ''; |
| | 351 | |
| | 352 | // We initially thought it had an extra opening bracket, but it doesn't, |
| | 353 | // so it started one character later than we thought. |
| | 354 | $this->current_shortcode['cursor_position'] += 1; |
| | 355 | } |
| | 356 | } |
| | 357 | |
| | 358 | $this->process_current_shortcode(); |
| | 359 | } else { |
| | 360 | $this->debug( 'The closing tag was not for the currently open shortcode.' ); |
| | 361 | |
| | 362 | $found_matching_shortcode = false; |
| | 363 | |
| | 364 | for ( $stack_index = count( $this->stack ) - 1; $stack_index >= 0; $stack_index-- ) { |
| | 365 | $rest_of_closing_tag = '/' . $this->stack[ $stack_index ]['tag_slug'] . ']'; |
| | 366 | |
| | 367 | if ( $rest_of_closing_tag === mb_substr( $this->content, $this->cursor_position + 1, mb_strlen( $rest_of_closing_tag ) ) ) { |
| | 368 | // Yes, it closes this one. |
| | 369 | $found_matching_shortcode = true; |
| | 370 | |
| | 371 | if ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state ) { |
| | 372 | // We already saved the bracket as part of the full tag, expecting that the closing tag would be for the current shortcode. |
| | 373 | // It's not, so remove it. |
| | 374 | $this->current_shortcode['full_tag'] = mb_substr( $this->current_shortcode['full_tag'], 0, -1 ); |
| | 375 | } |
| | 376 | |
| | 377 | $this->debug( 'The closing tag was for this shortcode:', $this->stack[ $stack_index ] ); |
| | 378 | |
| | 379 | // This means that the "current" shortcode and any others above this one on the stack need to be closed out, because they are self-closing. |
| | 380 | do { |
| | 381 | $this->debug( 'Inner content was:', $this->current_shortcode['inner_content'], 'Full tag was:', $this->current_shortcode['full_tag'] ); |
| | 382 | |
| | 383 | $this->current_shortcode['full_tag'] = mb_substr( $this->current_shortcode['full_tag'], 0, -1 * mb_strlen( $this->current_shortcode['inner_content'] ) ); |
| | 384 | |
| | 385 | // And there is no inner content. |
| | 386 | $this->current_shortcode['inner_content'] = ''; |
| | 387 | |
| | 388 | $this->process_current_shortcode(); // This sets $current_shortcode using the top stack item, so we don't need to do it. |
| | 389 | } while ( count( $this->stack ) > $stack_index + 1 ); |
| | 390 | |
| | 391 | // At this point, the shortcode that is being closed right now is $this->current_shortcode. |
| | 392 | // The easiest way to process this without duplicating code is to reprocess the current |
| | 393 | // character with the new stack and current shortcode, so the section above will get |
| | 394 | // triggered, since the closing tag will be for the current shortcode. |
| | 395 | |
| | 396 | $this->debug( 'Restarting this iteration of the parsing loop with new stack structure.' ); |
| | 397 | continue 3; |
| | 398 | } |
| | 399 | } |
| | 400 | |
| | 401 | |
| | 402 | if ( ! $found_matching_shortcode ) { |
| | 403 | $this->current_shortcode['inner_content'] .= $char; |
| | 404 | } |
| | 405 | } |
| | 406 | } elseif ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state ) { |
| | 407 | $this->current_shortcode['inner_content'] .= $char; |
| | 408 | } |
| | 409 | } |
| | 410 | |
| | 411 | break; |
| | 412 | case self::SHORTCODE_PARSE_STATE_IN_TAG: |
| | 413 | $this->current_shortcode['full_tag'] .= $char; |
| | 414 | |
| | 415 | if ( ! $is_escaped && '/' === $char && mb_substr( $this->content, $this->cursor_position + 1, 1 ) === ']' ) { |
| | 416 | // The shortcode is over. |
| | 417 | $this->current_shortcode['self_closing_slash'] = '/'; |
| | 418 | $this->current_shortcode['full_tag'] .= ']'; |
| | 419 | $this->cursor_position++; |
| | 420 | |
| | 421 | // If the shortcode had an extra opening bracket but doesn't have an extra |
| | 422 | // closing bracket, ignore the extra opening bracket. |
| | 423 | |
| | 424 | if ( $this->current_shortcode['extra_opening_bracket'] ) { |
| | 425 | if ( ']' === mb_substr( $this->content, $this->cursor_position + 1, 1 ) ) { |
| | 426 | $this->current_shortcode['extra_closing_bracket'] = ']'; |
| | 427 | $this->current_shortcode['full_tag'] .= ']'; |
| | 428 | $this->cursor_position++; |
| | 429 | } else { |
| | 430 | $this->current_shortcode['full_tag'] = mb_substr( $this->current_shortcode['full_tag'], 1 ); |
| | 431 | $this->current_shortcode['extra_opening_bracket'] = ''; |
| | 432 | |
| | 433 | // We initially thought it had an extra opening bracket, but it doesn't, |
| | 434 | // so it started one character later than we thought. |
| | 435 | $this->current_shortcode['cursor_position'] += 1; |
| | 436 | } |
| | 437 | } |
| | 438 | |
| | 439 | $this->process_current_shortcode(); |
| | 440 | |
| | 441 | break; |
| | 442 | } elseif ( ! $is_escaped && ']' === $char ) { |
| | 443 | if ( $this->current_shortcode['extra_opening_bracket'] ) { |
| | 444 | // This makes the assumption that this shortcode is closed as soon as the double brackets are found: |
| | 445 | // |
| | 446 | // [[my-shortcode]][/my-shortcode]] |
| | 447 | // |
| | 448 | // But in theory, this could just be a shortcode with the content "]". |
| | 449 | |
| | 450 | if ( ']' === mb_substr( $this->content, $this->cursor_position + 1, 1 ) ) { |
| | 451 | $this->current_shortcode['extra_closing_bracket'] = ']'; |
| | 452 | $this->current_shortcode['full_tag'] .= ']'; |
| | 453 | $this->cursor_position++; |
| | 454 | |
| | 455 | $this->process_current_shortcode(); |
| | 456 | break; |
| | 457 | } else { |
| | 458 | // There was not an extra closing bracket. |
| | 459 | $this->debug( 'Extra closing bracket not found; the character was ' . mb_substr( $this->content, $this->cursor_position + 1, 1 ) ); |
| | 460 | } |
| | 461 | } |
| | 462 | |
| | 463 | if ( false === mb_strpos( mb_substr( $this->content, $this->cursor_position ), '[/' . $this->current_shortcode['tag_slug'] . ']' ) ) { |
| | 464 | // If there's no closing tag, it's a self-enclosed shortcode, and we're done with it. |
| | 465 | $this->process_current_shortcode(); |
| | 466 | } else { |
| | 467 | $this->debug( 'Expecting to find a closing tag for ' . $this->current_shortcode['tag_slug'] ); |
| | 468 | $this->state = self::SHORTCODE_PARSE_STATE_IN_CONTENT; |
| | 469 | } |
| | 470 | } else { |
| | 471 | $this->current_shortcode['atts_and_values'] .= $char; |
| | 472 | |
| | 473 | if ( ! $is_escaped && '\\' === $char ) { |
| | 474 | $found_escape_character = true; |
| | 475 | } elseif ( ! $is_escaped && ( '"' === $char || "'" === $char ) ) { |
| | 476 | $this->state = self::SHORTCODE_PARSE_STATE_IN_QUOTED_STRING; |
| | 477 | $delimiter = $char; |
| | 478 | } else { |
| | 479 | // Nothing to do. |
| | 480 | } |
| | 481 | } |
| | 482 | |
| | 483 | break; |
| | 484 | case self::SHORTCODE_PARSE_STATE_IN_QUOTED_STRING: |
| | 485 | $this->current_shortcode['full_tag'] .= $char; |
| | 486 | $this->current_shortcode['atts_and_values'] .= $char; |
| | 487 | |
| | 488 | if ( $is_escaped ) { |
| | 489 | // Nothing to do. This is just an escaped character to be taken literally. |
| | 490 | } else { |
| | 491 | // Not escaped. |
| | 492 | if ( '\\' === $char ) { |
| | 493 | // The next character is escaped. |
| | 494 | $found_escape_character = true; |
| | 495 | } elseif ( $char === $delimiter ) { |
| | 496 | $this->state = self::SHORTCODE_PARSE_STATE_IN_TAG; |
| | 497 | $delimiter = null; |
| | 498 | } |
| | 499 | } |
| | 500 | |
| | 501 | break; |
| | 502 | } |
| | 503 | |
| | 504 | // Is the next character escaped? |
| | 505 | if ( $found_escape_character ) { |
| | 506 | $is_escaped = true; |
| | 507 | } else { |
| | 508 | // If we didn't find an escape character here, then no. |
| | 509 | $is_escaped = false; |
| | 510 | } |
| | 511 | |
| | 512 | $this->cursor_position++; |
| | 513 | |
| | 514 | $this->debug( 'Cursor position is ' . $this->cursor_position . '; strlen is ' . mb_strlen( $this->content ) ); |
| | 515 | } |
| | 516 | |
| | 517 | if ( self::SHORTCODE_PARSE_STATE_IN_QUOTED_STRING === $this->state ) { |
| | 518 | // example: This is my content [footag foo=" [bartag] |
| | 519 | // Should it be reprocessed in order to convert [bartag] or is this considered malformed? |
| | 520 | } |
| | 521 | |
| | 522 | if ( self::SHORTCODE_PARSE_STATE_IN_TAG === $this->state ) { |
| | 523 | // example: This is my content [footag foo="abc" bar="def" [bartag] |
| | 524 | // Should it be reprocessed in order to convert [bartag] or is this considered malformed? |
| | 525 | } |
| | 526 | |
| | 527 | if ( $this->current_shortcode ) { |
| | 528 | $this->debug( 'There are still pending shortcodes to process.', $this->current_shortcode, $this->stack ); |
| | 529 | |
| | 530 | /* |
| | 531 | * If we end with shortcodes still on the stack, then there was a situation like this: |
| | 532 | * |
| | 533 | * [footag] [bartag] [baztag] [footag]content[/footag] |
| | 534 | * |
| | 535 | * i.e., a scenario where the parser was unsure whether the first [footag] was self-closing or not. |
| | 536 | * |
| | 537 | * By this point, $content will be in this format: |
| | 538 | * |
| | 539 | * [footag] bartag-output baztag-output footag-content-output |
| | 540 | * |
| | 541 | * so we need to back up and process the still-stored shortcodes as unclosed. |
| | 542 | * |
| | 543 | * An extreme version of this would look like: |
| | 544 | * |
| | 545 | * [footag] [footag] [footag] [footag] [footag] [footag] [footag] [footag] ... [footag][/footag] |
| | 546 | * |
| | 547 | * where the last tag would be the only one processed normally above and there would be n-1 [footag]s still on the stack. |
| | 548 | */ |
| | 549 | while ( $this->current_shortcode ) { |
| | 550 | // What we thought was part of this tag was just regular content. |
| | 551 | $this->current_shortcode['full_tag'] = mb_substr( $this->current_shortcode['full_tag'], 0, -1 * mb_strlen( $this->current_shortcode['inner_content'] ) ); |
| | 552 | |
| | 553 | // And there is no inner content. |
| | 554 | $this->current_shortcode['inner_content'] = ''; |
| | 555 | |
| | 556 | $this->process_current_shortcode(); // This sets $current_shortcode, so we don't need to do it. |
| | 557 | } |
| | 558 | } |
| | 559 | |
| | 560 | return $this->content; |
| | 561 | } |
| | 562 | |
| | 563 | /** |
| | 564 | * Create an argument to pass to do_shortcode_tag. The format of this argument was determined |
| | 565 | * by the capture groups of the regular expression that used to be used to parse shortcodes out of content. |
| | 566 | * |
| | 567 | * @param array $shortcode An associative array comprising data about a shortcode in the text. |
| | 568 | * @return array A numerically-indexed array of the shortcode data ready for do_shortcode_tag(). |
| | 569 | */ |
| | 570 | private function shortcode_argument( $shortcode ) { |
| | 571 | return array( |
| | 572 | $shortcode['full_tag'], |
| | 573 | $shortcode['extra_opening_bracket'], |
| | 574 | $shortcode['tag_slug'], |
| | 575 | $shortcode['atts_and_values'], |
| | 576 | $shortcode['self_closing_slash'], |
| | 577 | $shortcode['inner_content'], |
| | 578 | $shortcode['extra_closing_bracket'], |
| | 579 | ); |
| | 580 | } |
| | 581 | |
| | 582 | /** |
| | 583 | * The shortcode at the top of the stack is complete and can be processed. |
| | 584 | * Process it and modify the enclosing shortcode as if the content was passed in |
| | 585 | * with this shortcode already converted into HTML. |
| | 586 | */ |
| | 587 | private function process_current_shortcode() { |
| | 588 | $this->debug( 'Content is: ' . $this->content ); |
| | 589 | |
| | 590 | $this->debug( $this->current_shortcode ); |
| | 591 | |
| | 592 | $argument_for_do_shortcode_tag = $this->shortcode_argument( $this->current_shortcode ); |
| | 593 | |
| | 594 | $shortcode_output = do_shortcode_tag( $argument_for_do_shortcode_tag ); |
| | 595 | |
| | 596 | // Replace based on position rather than find and replace, since this content is possible: |
| | 597 | // |
| | 598 | // Test 123 [some-shortcode] To use my shortcode, type [[some-shortcode]]. |
| | 599 | $this->content = |
| | 600 | mb_substr( $this->content, 0, $this->current_shortcode['cursor_position'] ) |
| | 601 | . $shortcode_output |
| | 602 | . mb_substr( $this->content, $this->current_shortcode['cursor_position'] + mb_strlen( $this->current_shortcode['full_tag'] ) ) |
| | 603 | ; |
| | 604 | |
| | 605 | // Update the cursor position to the end of this shortcode's output. |
| | 606 | // The -1 is because the position is incremented after this gets called to move it to the next character. |
| | 607 | $this->cursor_position = $this->current_shortcode['cursor_position'] + mb_strlen( $shortcode_output ) - 1; |
| | 608 | |
| | 609 | // For any enclosing shortcode, its inner content needs to include the full output of this shortcode. |
| | 610 | if ( ! empty( $this->stack ) ) { |
| | 611 | $this->current_shortcode = array_pop( $this->stack ); |
| | 612 | |
| | 613 | $this->current_shortcode['inner_content'] .= $shortcode_output; |
| | 614 | $this->current_shortcode['full_tag'] .= $shortcode_output; |
| | 615 | |
| | 616 | $this->state = self::SHORTCODE_PARSE_STATE_IN_CONTENT; |
| | 617 | } else { |
| | 618 | $this->current_shortcode = null; |
| | 619 | |
| | 620 | $this->state = self::SHORTCODE_PARSE_STATE_DEFAULT; |
| | 621 | |
| | 622 | // In the default state, we can skip over any content that couldn't be a shortcode, |
| | 623 | // so let's move forward near the next bracket. |
| | 624 | $this->forward_cursor_to_next_bracket(); |
| | 625 | } |
| | 626 | |
| | 627 | $this->debug( 'Content is: ' . $this->content ); |
| | 628 | } |
| | 629 | |
| | 630 | private function forward_cursor_to_next_bracket() { |
| | 631 | /* |
| | 632 | * The max() here is because $cursor_position can be -1 if a shortcode |
| | 633 | * at the beginning of the content didn't have any output and reset the |
| | 634 | * cursor back to the beginning. It's -1 instead of zero because it will |
| | 635 | * be incremented later in the loop to set it to zero for the next iteration. |
| | 636 | */ |
| | 637 | $next_bracket_location = mb_strpos( $this->content, '[', max( 0, $this->cursor_position ) ); |
| | 638 | |
| | 639 | if ( false !== $next_bracket_location ) { |
| | 640 | $this->debug( 'Current cursor position: ' . $this->cursor_position ); |
| | 641 | |
| | 642 | /* |
| | 643 | * Again, the -1 is because this will be incremented before it is used, |
| | 644 | * and we really want it to have a minimum value of zero. |
| | 645 | */ |
| | 646 | $this->cursor_position = max( -1, $next_bracket_location - 2 ); |
| | 647 | |
| | 648 | $this->debug( 'Jumped ahead to position ' . $this->cursor_position ); |
| | 649 | } |
| | 650 | } |
| | 651 | |
| | 652 | /** |
| | 653 | * Outputs debug data. Useful when running unit tests to see how content is being parsed. |
| | 654 | * |
| | 655 | * @param mixed One of more variables of any type. |
| | 656 | */ |
| | 657 | private function debug( /* ... */ ) { |
| | 658 | if ( defined( 'WP_DEBUG' ) && WP_DEBUG && $this->DEBUG ) { |
| | 659 | foreach ( func_get_args() as $arg ) { |
| | 660 | if ( 'string' === gettype( $arg ) ) { |
| | 661 | error_log( 'Shortcode_Parser debug: ' . $arg ); |
| | 662 | } else { |
| | 663 | error_log( 'Shortcode_Parser debug: ' . var_export( $arg, true ) ); |
| | 664 | } |
| | 665 | } |
| | 666 | } |
| | 667 | } |
| | 668 | |
| | 669 | } |
| | 670 | |
| | 671 | /** |