| 227 | * A collection of methods for parsing and executing shortcodes in content. |
| 228 | */ |
| 229 | class Shortcode_Parser { |
| 230 | private $content; |
| 231 | private $state; |
| 232 | private $cursor_position; |
| 233 | private $stack; |
| 234 | private $tagnames; |
| 235 | private $current_shortcode; |
| 236 | |
| 237 | const SHORTCODE_PARSE_STATE_DEFAULT = 0; |
| 238 | const SHORTCODE_PARSE_STATE_IN_TAG = 1; |
| 239 | const SHORTCODE_PARSE_STATE_IN_CONTENT = 2; |
| 240 | const SHORTCODE_PARSE_STATE_IN_QUOTED_STRING = 3; |
| 241 | |
| 242 | private $DEBUG = false; |
| 243 | |
| 244 | public function __construct( $content, $tagnames ) { |
| 245 | $this->content = $content; |
| 246 | $this->tagnames = $tagnames; |
| 247 | } |
| 248 | |
| 249 | /** |
| 250 | * Parse shortcodes in content and replace them with the output that their handler functions generate. |
| 251 | */ |
| 252 | public function parse() { |
| 253 | $this->stack = array(); |
| 254 | |
| 255 | /** |
| 256 | * A regular expression that checks whether a string appears to begin with a tag for |
| 257 | * a registered shortcode. |
| 258 | */ |
| 259 | $registered_shortcode_regex= '/^(?P<extra_opening_bracket>\\[?)(?P<opening_bracket>\\[)(?P<tag_slug>' . join( '|', array_map( 'preg_quote', $this->tagnames ) ) . ')(?![\\w-])/u'; |
| 260 | |
| 261 | $this->cursor_position = 0; |
| 262 | |
| 263 | $this->state = self::SHORTCODE_PARSE_STATE_DEFAULT; |
| 264 | |
| 265 | $is_escaped = false; |
| 266 | $delimiter = null; |
| 267 | |
| 268 | $this->debug( 'Parsing content: ' . $this->content ); |
| 269 | |
| 270 | while ( $this->cursor_position < mb_strlen( $this->content ) ) { |
| 271 | $char = mb_substr( $this->content, $this->cursor_position, 1 ); |
| 272 | |
| 273 | $this->debug( 'In position ' . $this->cursor_position . ' with state ' . $this->state . ', looking at character "' . $char . '"' ); |
| 274 | |
| 275 | $found_escape_character = false; |
| 276 | |
| 277 | switch ( $this->state ) { |
| 278 | case self::SHORTCODE_PARSE_STATE_DEFAULT: |
| 279 | case self::SHORTCODE_PARSE_STATE_IN_CONTENT: |
| 280 | if ( |
| 281 | ! $is_escaped |
| 282 | && '[' === $char |
| 283 | && preg_match( $registered_shortcode_regex, mb_substr( $this->content, $this->cursor_position ), $m ) ) { |
| 284 | if ( $this->current_shortcode ) { |
| 285 | $this->stack[] = $this->current_shortcode; |
| 286 | } |
| 287 | |
| 288 | // We have found the beginning of a shortcode. |
| 289 | $this->current_shortcode = array( |
| 290 | 'full_tag' => $m[0], |
| 291 | 'extra_opening_bracket' => $m['extra_opening_bracket'], |
| 292 | 'tag_slug' => $m['tag_slug'], |
| 293 | 'atts_and_values' => '', |
| 294 | 'self_closing_slash' => '', |
| 295 | 'inner_content' => '', |
| 296 | 'extra_closing_bracket' => '', |
| 297 | 'cursor_position' => $this->cursor_position, |
| 298 | ); |
| 299 | |
| 300 | $this->cursor_position += mb_strlen( $m[0] ); |
| 301 | |
| 302 | $this->debug( 'Found "' . $m[0] . '", moving to position ' . $this->cursor_position ); |
| 303 | $this->debug( $this->current_shortcode ); |
| 304 | |
| 305 | // Move back one so it's as if we just processed the last character of the shortcode slug. |
| 306 | $this->cursor_position--; |
| 307 | |
| 308 | $this->state = self::SHORTCODE_PARSE_STATE_IN_TAG; |
| 309 | } else { |
| 310 | if ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state ) { |
| 311 | $this->current_shortcode['full_tag'] .= $char; |
| 312 | } |
| 313 | |
| 314 | if ( ! $is_escaped && '\\' === $char ) { |
| 315 | // The next character is escaped. |
| 316 | $found_escape_character = true; |
| 317 | |
| 318 | $this->current_shortcode['inner_content'] .= $char; |
| 319 | } elseif ( $is_escaped ) { |
| 320 | $this->current_shortcode['inner_content'] .= $char; |
| 321 | } elseif ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state && '[' === $char ) { |
| 322 | // Check whether it's a closing tag of any currently open shortcode. |
| 323 | $rest_of_closing_tag = '/' . $this->current_shortcode['tag_slug'] . ']'; |
| 324 | |
| 325 | if ( $rest_of_closing_tag === mb_substr( $this->content, $this->cursor_position + 1, mb_strlen( $rest_of_closing_tag ) ) ) { |
| 326 | // The end of this shortcode. |
| 327 | |
| 328 | $this->current_shortcode['full_tag'] .= $rest_of_closing_tag; |
| 329 | |
| 330 | // Move the cursor to the end of the closing tag. |
| 331 | $this->cursor_position += mb_strlen( $rest_of_closing_tag ); |
| 332 | |
| 333 | if ( $this->current_shortcode['extra_opening_bracket'] ) { |
| 334 | if ( ']' === mb_substr( $this->content, $this->cursor_position + 1, 1 ) ) { |
| 335 | $this->current_shortcode['full_tag'] .= ']'; |
| 336 | $this->current_shortcode['extra_closing_bracket'] = ']'; |
| 337 | $this->cursor_position++; |
| 338 | } else { |
| 339 | // If there was an extra opening bracket but not an extra closing bracket, |
| 340 | // ignore the extra opening bracket. |
| 341 | |
| 342 | $this->current_shortcode['full_tag'] = mb_substr( $this->current_shortcode['full_tag'], 1 ); |
| 343 | $this->current_shortcode['extra_opening_bracket'] = ''; |
| 344 | |
| 345 | // We initially thought it had an extra opening bracket, but it doesn't, |
| 346 | // so it started one character later than we thought. |
| 347 | $this->current_shortcode['cursor_position'] += 1; |
| 348 | } |
| 349 | } |
| 350 | |
| 351 | $this->process_current_shortcode(); |
| 352 | } else { |
| 353 | $this->debug( 'The closing tag was not for the currently open shortcode.' ); |
| 354 | |
| 355 | $found_matching_shortcode = false; |
| 356 | |
| 357 | for ( $stack_index = count( $this->stack ) - 1; $stack_index >= 0; $stack_index-- ) { |
| 358 | $rest_of_closing_tag = '/' . $this->stack[ $stack_index ]['tag_slug'] . ']'; |
| 359 | |
| 360 | if ( $rest_of_closing_tag === mb_substr( $this->content, $this->cursor_position + 1, mb_strlen( $rest_of_closing_tag ) ) ) { |
| 361 | // Yes, it closes this one. |
| 362 | $found_matching_shortcode = true; |
| 363 | |
| 364 | if ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state ) { |
| 365 | // We already saved the bracket as part of the full tag, expecting that the closing tag would be for the current shortcode. |
| 366 | // It's not, so remove it. |
| 367 | $this->current_shortcode['full_tag'] = mb_substr( $this->current_shortcode['full_tag'], 0, -1 ); |
| 368 | } |
| 369 | |
| 370 | $this->debug( 'The closing tag was for this shortcode:', $this->stack[ $stack_index ] ); |
| 371 | |
| 372 | // This means that the "current" shortcode and any others above this one on the stack need to be closed out, because they are self-closing. |
| 373 | do { |
| 374 | $this->debug( 'Inner content was:', $this->current_shortcode['inner_content'], 'Full tag was:', $this->current_shortcode['full_tag'] ); |
| 375 | |
| 376 | $this->current_shortcode['full_tag'] = mb_substr( $this->current_shortcode['full_tag'], 0, -1 * mb_strlen( $this->current_shortcode['inner_content'] ) ); |
| 377 | |
| 378 | // And there is no inner content. |
| 379 | $this->current_shortcode['inner_content'] = ''; |
| 380 | |
| 381 | $this->process_current_shortcode(); // This sets $current_shortcode using the top stack item, so we don't need to do it. |
| 382 | } while ( count( $this->stack ) > $stack_index + 1 ); |
| 383 | |
| 384 | // At this point, the shortcode that is being closed right now is $this->current_shortcode. |
| 385 | // The easiest way to process this without duplicating code is to reprocess the current |
| 386 | // character with the new stack and current shortcode, so the section above will get |
| 387 | // triggered, since the closing tag will be for the current shortcode. |
| 388 | |
| 389 | $this->debug( 'Restarting this iteration of the parsing loop with new stack structure.' ); |
| 390 | continue 3; |
| 391 | } |
| 392 | } |
| 393 | |
| 394 | |
| 395 | if ( ! $found_matching_shortcode ) { |
| 396 | $this->current_shortcode['inner_content'] .= $char; |
| 397 | } |
| 398 | } |
| 399 | } elseif ( self::SHORTCODE_PARSE_STATE_IN_CONTENT === $this->state ) { |
| 400 | $this->current_shortcode['inner_content'] .= $char; |
| 401 | } |
| 402 | } |
| 403 | |
| 404 | break; |
| 405 | case self::SHORTCODE_PARSE_STATE_IN_TAG: |
| 406 | $this->current_shortcode['full_tag'] .= $char; |
| 407 | |
| 408 | if ( ! $is_escaped && '/' === $char && mb_substr( $this->content, $this->cursor_position + 1, 1 ) === ']' ) { |
| 409 | // The shortcode is over. |
| 410 | $this->current_shortcode['self_closing_slash'] = '/'; |
| 411 | $this->current_shortcode['full_tag'] .= ']'; |
| 412 | $this->cursor_position++; |
| 413 | |
| 414 | // If the shortcode had an extra opening bracket but doesn't have an extra |
| 415 | // closing bracket, ignore the extra opening bracket. |
| 416 | |
| 417 | if ( $this->current_shortcode['extra_opening_bracket'] ) { |
| 418 | if ( ']' === mb_substr( $this->content, $this->cursor_position + 1, 1 ) ) { |
| 419 | $this->current_shortcode['extra_closing_bracket'] = ']'; |
| 420 | $this->current_shortcode['full_tag'] .= ']'; |
| 421 | $this->cursor_position++; |
| 422 | } else { |
| 423 | $this->current_shortcode['full_tag'] = mb_substr( $this->current_shortcode['full_tag'], 1 ); |
| 424 | $this->current_shortcode['extra_opening_bracket'] = ''; |
| 425 | |
| 426 | // We initially thought it had an extra opening bracket, but it doesn't, |
| 427 | // so it started one character later than we thought. |
| 428 | $this->current_shortcode['cursor_position'] += 1; |
| 429 | } |
| 430 | } |
| 431 | |
| 432 | $this->process_current_shortcode(); |
| 433 | |
| 434 | break; |
| 435 | } elseif ( ! $is_escaped && ']' === $char ) { |
| 436 | if ( $this->current_shortcode['extra_opening_bracket'] ) { |
| 437 | // This makes the assumption that this shortcode is closed as soon as the double brackets are found: |
| 438 | // |
| 439 | // [[my-shortcode]][/my-shortcode]] |
| 440 | // |
| 441 | // But in theory, this could just be a shortcode with the content "]". |
| 442 | |
| 443 | if ( ']' === mb_substr( $this->content, $this->cursor_position + 1, 1 ) ) { |
| 444 | $this->current_shortcode['extra_closing_bracket'] = ']'; |
| 445 | $this->current_shortcode['full_tag'] .= ']'; |
| 446 | $this->cursor_position++; |
| 447 | |
| 448 | $this->process_current_shortcode(); |
| 449 | break; |
| 450 | } else { |
| 451 | // There was not an extra closing bracket. |
| 452 | $this->debug( 'Extra closing bracket not found; the character was ' . mb_substr( $this->content, $this->cursor_position + 1, 1 ) ); |
| 453 | } |
| 454 | } |
| 455 | |
| 456 | if ( false === mb_strpos( mb_substr( $this->content, $this->cursor_position ), '[/' . $this->current_shortcode['tag_slug'] . ']' ) ) { |
| 457 | // If there's no closing tag, it's a self-enclosed shortcode, and we're done with it. |
| 458 | $this->process_current_shortcode(); |
| 459 | } else { |
| 460 | $this->debug( 'Expecting to find a closing tag for ' . $this->current_shortcode['tag_slug'] ); |
| 461 | $this->state = self::SHORTCODE_PARSE_STATE_IN_CONTENT; |
| 462 | } |
| 463 | } else { |
| 464 | $this->current_shortcode['atts_and_values'] .= $char; |
| 465 | |
| 466 | if ( ! $is_escaped && '\\' === $char ) { |
| 467 | $found_escape_character = true; |
| 468 | } elseif ( ! $is_escaped && ( '"' === $char || "'" === $char ) ) { |
| 469 | $this->state = self::SHORTCODE_PARSE_STATE_IN_QUOTED_STRING; |
| 470 | $delimiter = $char; |
| 471 | } else { |
| 472 | // Nothing to do. |
| 473 | } |
| 474 | } |
| 475 | |
| 476 | break; |
| 477 | case self::SHORTCODE_PARSE_STATE_IN_QUOTED_STRING: |
| 478 | $this->current_shortcode['full_tag'] .= $char; |
| 479 | $this->current_shortcode['atts_and_values'] .= $char; |
| 480 | |
| 481 | if ( $is_escaped ) { |
| 482 | // Nothing to do. This is just an escaped character to be taken literally. |
| 483 | } else { |
| 484 | // Not escaped. |
| 485 | if ( '\\' === $char ) { |
| 486 | // The next character is escaped. |
| 487 | $found_escape_character = true; |
| 488 | } elseif ( $char === $delimiter ) { |
| 489 | $this->state = self::SHORTCODE_PARSE_STATE_IN_TAG; |
| 490 | $delimiter = null; |
| 491 | } |
| 492 | } |
| 493 | |
| 494 | break; |
| 495 | } |
| 496 | |
| 497 | // Is the next character escaped? |
| 498 | if ( $found_escape_character ) { |
| 499 | $is_escaped = true; |
| 500 | } else { |
| 501 | // If we didn't find an escape character here, then no. |
| 502 | $is_escaped = false; |
| 503 | } |
| 504 | |
| 505 | $this->cursor_position++; |
| 506 | |
| 507 | $this->debug( 'Cursor position is ' . $this->cursor_position . '; strlen is ' . mb_strlen( $this->content ) ); |
| 508 | } |
| 509 | |
| 510 | if ( self::SHORTCODE_PARSE_STATE_IN_QUOTED_STRING === $this->state ) { |
| 511 | // example: This is my content [footag foo=" [bartag] |
| 512 | // Should it be reprocessed in order to convert [bartag] or is this considered malformed? |
| 513 | } |
| 514 | |
| 515 | if ( self::SHORTCODE_PARSE_STATE_IN_TAG === $this->state ) { |
| 516 | // example: This is my content [footag foo="abc" bar="def" [bartag] |
| 517 | // Should it be reprocessed in order to convert [bartag] or is this considered malformed? |
| 518 | } |
| 519 | |
| 520 | if ( $this->current_shortcode ) { |
| 521 | $this->debug( 'There are still pending shortcodes to process.', $this->current_shortcode, $this->stack ); |
| 522 | |
| 523 | /* |
| 524 | * If we end with shortcodes still on the stack, then there was a situation like this: |
| 525 | * |
| 526 | * [footag] [bartag] [baztag] [footag]content[/footag] |
| 527 | * |
| 528 | * i.e., a scenario where the parser was unsure whether the first [footag] was self-closing or not. |
| 529 | * |
| 530 | * By this point, $content will be in this format: |
| 531 | * |
| 532 | * [footag] bartag-output baztag-output footag-content-output |
| 533 | * |
| 534 | * so we need to back up and process the still-stored shortcodes as unclosed. |
| 535 | * |
| 536 | * An extreme version of this would look like: |
| 537 | * |
| 538 | * [footag] [footag] [footag] [footag] [footag] [footag] [footag] [footag] ... [footag][/footag] |
| 539 | * |
| 540 | * where the last tag would be the only one processed normally above and there would be n-1 [footag]s still on the stack. |
| 541 | */ |
| 542 | while ( $this->current_shortcode ) { |
| 543 | // What we thought was part of this tag was just regular content. |
| 544 | $this->current_shortcode['full_tag'] = mb_substr( $this->current_shortcode['full_tag'], 0, -1 * mb_strlen( $this->current_shortcode['inner_content'] ) ); |
| 545 | |
| 546 | // And there is no inner content. |
| 547 | $this->current_shortcode['inner_content'] = ''; |
| 548 | |
| 549 | $this->process_current_shortcode(); // This sets $current_shortcode, so we don't need to do it. |
| 550 | } |
| 551 | } |
| 552 | |
| 553 | return $this->content; |
| 554 | } |
| 555 | |
| 556 | /** |
| 557 | * Create an argument to pass to do_shortcode_tag. The format of this argument was determined |
| 558 | * by the capture groups of the regular expression that used to be used to parse shortcodes out of content. |
| 559 | * |
| 560 | * @param array $shortcode An associative array comprising data about a shortcode in the text. |
| 561 | * @return array A numerically-indexed array of the shortcode data ready for do_shortcode_tag(). |
| 562 | */ |
| 563 | private function shortcode_argument( $shortcode ) { |
| 564 | return array( |
| 565 | $shortcode['full_tag'], |
| 566 | $shortcode['extra_opening_bracket'], |
| 567 | $shortcode['tag_slug'], |
| 568 | $shortcode['atts_and_values'], |
| 569 | $shortcode['self_closing_slash'], |
| 570 | $shortcode['inner_content'], |
| 571 | $shortcode['extra_closing_bracket'], |
| 572 | ); |
| 573 | } |
| 574 | |
| 575 | /** |
| 576 | * The shortcode at the top of the stack is complete and can be processed. |
| 577 | * Process it and modify the enclosing shortcode as if the content was passed in |
| 578 | * with this shortcode already converted into HTML. |
| 579 | */ |
| 580 | private function process_current_shortcode() { |
| 581 | $this->debug( 'Content is: ' . $this->content ); |
| 582 | |
| 583 | $this->debug( $this->current_shortcode ); |
| 584 | |
| 585 | $argument_for_do_shortcode_tag = $this->shortcode_argument( $this->current_shortcode ); |
| 586 | |
| 587 | $shortcode_output = do_shortcode_tag( $argument_for_do_shortcode_tag ); |
| 588 | |
| 589 | // Replace based on position rather than find and replace, since this content is possible: |
| 590 | // |
| 591 | // Test 123 [some-shortcode] To use my shortcode, type [[some-shortcode]]. |
| 592 | $this->content = |
| 593 | mb_substr( $this->content, 0, $this->current_shortcode['cursor_position'] ) |
| 594 | . $shortcode_output |
| 595 | . mb_substr( $this->content, $this->current_shortcode['cursor_position'] + mb_strlen( $this->current_shortcode['full_tag'] ) ) |
| 596 | ; |
| 597 | |
| 598 | // Update the cursor position to the end of this shortcode's output. |
| 599 | // The -1 is because the position is incremented after this gets called to move it to the next character. |
| 600 | $this->cursor_position = $this->current_shortcode['cursor_position'] + mb_strlen( $shortcode_output ) - 1; |
| 601 | |
| 602 | // For any enclosing shortcode, its inner content needs to include the full output of this shortcode. |
| 603 | if ( ! empty( $this->stack ) ) { |
| 604 | $this->current_shortcode = array_pop( $this->stack ); |
| 605 | |
| 606 | $this->current_shortcode['inner_content'] .= $shortcode_output; |
| 607 | $this->current_shortcode['full_tag'] .= $shortcode_output; |
| 608 | |
| 609 | $this->state = self::SHORTCODE_PARSE_STATE_IN_CONTENT; |
| 610 | } else { |
| 611 | $this->current_shortcode = null; |
| 612 | |
| 613 | $this->state = self::SHORTCODE_PARSE_STATE_DEFAULT; |
| 614 | } |
| 615 | |
| 616 | $this->debug( 'Content is: ' . $this->content ); |
| 617 | } |
| 618 | |
| 619 | /** |
| 620 | * Outputs debug data. Useful when running unit tests to see how content is being parsed. |
| 621 | * |
| 622 | * @param mixed One of more variables of any type. |
| 623 | */ |
| 624 | private function debug( /* ... */ ) { |
| 625 | if ( defined( 'WP_DEBUG' ) && WP_DEBUG && $this->DEBUG ) { |
| 626 | foreach ( func_get_args() as $arg ) { |
| 627 | if ( 'string' === gettype( $arg ) ) { |
| 628 | error_log( 'Shortcode_Parser debug: ' . $arg ); |
| 629 | } else { |
| 630 | error_log( 'Shortcode_Parser debug: ' . var_export( $arg, true ) ); |
| 631 | } |
| 632 | } |
| 633 | } |
| 634 | } |
| 635 | |
| 636 | } |
| 637 | |
| 638 | /** |