| 395 | * Add chat detection support to the `get_content_chat()` chat parser |
| 396 | * |
| 397 | * @since 3.6.0 |
| 398 | * |
| 399 | * @global array $_wp_chat_parsers |
| 400 | * @param string $name Unique identifier for chat format. Example: IRC |
| 401 | * @param string $newline_regex RegEx to match the start of a new line, typically when a new "username:" appears |
| 402 | * The parser will handle up to 3 matched expressions |
| 403 | * $matches[0] = the string before the user's message starts |
| 404 | * $matches[1] = the time of the message, if present |
| 405 | * $matches[2] = the author/username |
| 406 | * OR |
| 407 | * $matches[0] = the string before the user's message starts |
| 408 | * $matches[1] = the author/username |
| 409 | * @param string $delimiter_regex RegEx to determine where to split the username syntax from the chat message |
| 410 | */ |
| 411 | function add_chat_detection_format( $name, $newline_regex, $delimiter_regex ) { |
| 412 | global $_wp_chat_parsers; |
| 413 | |
| 414 | if ( empty( $_wp_chat_parsers ) ) |
| 415 | $_wp_chat_parsers = array(); |
| 416 | |
| 417 | $_wp_chat_parsers = array( $name => array( $newline_regex, $delimiter_regex ) ) + $_wp_chat_parsers; |
| 418 | } |
| 419 | add_chat_detection_format( 'IM', '#^([^:]+):#', '#[:]#' ); |
| 420 | add_chat_detection_format( 'Skype', '#^(\[.+?\])\s([^:]+):#', '#[:]#' ); |
| 421 | |
| 422 | /** |
| 423 | * Deliberately interpret passed content as a chat transcript that is optionally |
| 424 | * followed by commentary |
| 425 | * |
| 426 | * If the content does not contain username syntax, assume that it does not contain |
| 427 | * chat logs and return |
| 428 | * |
| 429 | * @since 3.6.0 |
| 430 | * |
| 431 | * Example: |
| 432 | * |
| 433 | * One stanza of chat: |
| 434 | * Scott: Hey, let's chat! |
| 435 | * Helen: No. |
| 436 | * |
| 437 | * $stanzas = array( |
| 438 | * array( |
| 439 | * array( |
| 440 | * 'time' => '', |
| 441 | * 'author' => 'Scott', |
| 442 | * 'messsage' => "Hey, let's chat!" |
| 443 | * ), |
| 444 | * array( |
| 445 | * 'time' => '', |
| 446 | * 'author' => 'Helen', |
| 447 | * 'message' => 'No.' |
| 448 | * ) |
| 449 | * ) |
| 450 | * ) |
| 451 | * @param string $content A string which might contain chat data. |
| 452 | * @param boolean $remove Whether to remove the found data from the passed content. |
| 453 | * @return array A chat log as structured data |
| 454 | */ |
| 455 | function get_content_chat( &$content, $remove = false ) { |
| 456 | global $_wp_chat_parsers; |
| 457 | |
| 458 | $trimmed = trim( $content ); |
| 459 | if ( empty( $trimmed ) ) |
| 460 | return array(); |
| 461 | |
| 462 | $has_match = false; |
| 463 | $matched_parser = false; |
| 464 | foreach ( $_wp_chat_parsers as $parser ) { |
| 465 | @list( $newline_regex ) = $parser; |
| 466 | if ( preg_match( $newline_regex, $trimmed ) ) { |
| 467 | $has_match = true; |
| 468 | $matched_parser = $parser; |
| 469 | break; |
| 470 | } |
| 471 | } |
| 472 | |
| 473 | if ( false === $matched_parser ) |
| 474 | return array(); |
| 475 | |
| 476 | @list( $newline_regex, $delimiter_regex ) = $parser; |
| 477 | |
| 478 | $last_index = 0; |
| 479 | $stanzas = array(); |
| 480 | $lines = explode( "\n", make_clickable( $trimmed ) ); |
| 481 | |
| 482 | $author = $time = ''; |
| 483 | $data = array(); |
| 484 | $stanza = array(); |
| 485 | |
| 486 | foreach ( $lines as $index => $line ) { |
| 487 | $line = trim( $line ); |
| 488 | |
| 489 | if ( empty( $line ) ) { |
| 490 | if ( ! empty( $author ) ) { |
| 491 | $stanza[] = array( |
| 492 | 'time' => $time, |
| 493 | 'author' => $author, |
| 494 | 'message' => join( ' ', $data ) |
| 495 | ); |
| 496 | } |
| 497 | |
| 498 | $stanzas[] = $stanza; |
| 499 | $last_index = $index; |
| 500 | $stanza = array(); |
| 501 | $author = $time = ''; |
| 502 | $data = array(); |
| 503 | if ( ! empty( $lines[$index + 1] ) && ! preg_match( $delimiter_regex, $lines[$index + 1] ) ) |
| 504 | break; |
| 505 | } |
| 506 | |
| 507 | $matches = array(); |
| 508 | $matched = preg_match( $newline_regex, $line, $matches ); |
| 509 | $author_match = empty( $matches[2] ) ? $matches[1] : $matches[2]; |
| 510 | // assume username syntax if no whitespace is present |
| 511 | $no_ws = $matched && ! preg_match( '#\s#', $author_match ); |
| 512 | // allow script-like stanzas |
| 513 | $has_ws = $matched && preg_match( '#\s#', $author_match ) && empty( $lines[$index + 1] ) && empty( $lines[$index - 1] ); |
| 514 | if ( $matched && ( ! empty( $matches[2] ) || ( $no_ws || $has_ws ) ) ) { |
| 515 | if ( ! empty( $author ) ) { |
| 516 | $stanza[] = array( |
| 517 | 'time' => $time, |
| 518 | 'author' => $author, |
| 519 | 'message' => join( ' ', $data ) |
| 520 | ); |
| 521 | $data = array(); |
| 522 | } |
| 523 | |
| 524 | $time = empty( $matches[2] ) ? '' : $matches[1]; |
| 525 | $author = $author_match; |
| 526 | $data[] = trim( str_replace( $matches[0], '', $line ) ); |
| 527 | } elseif ( preg_match( '#\S#', $line ) ) { |
| 528 | $data[] = $line; |
| 529 | } |
| 530 | } |
| 531 | |
| 532 | if ( ! empty( $author ) ) { |
| 533 | $stanza[] = array( |
| 534 | 'time' => $time, |
| 535 | 'author' => $author, |
| 536 | 'message' => trim( join( ' ', $data ) ) |
| 537 | ); |
| 538 | } |
| 539 | |
| 540 | if ( ! empty( $stanza ) ) |
| 541 | $stanzas[] = $stanza; |
| 542 | |
| 543 | if ( $remove ) |
| 544 | $content = trim( join( "\n", array_slice( $lines, $last_index ) ) ); |
| 545 | |
| 546 | return $stanzas; |
| 547 | } |
| 548 | |
| 549 | /** |
| 550 | * Retrieve structured chat data from the current or passed post |
| 551 | * |
| 552 | * @since 3.6.0 |
| 553 | * |
| 554 | * @param int $id Optional. Post ID |
| 555 | * @return array |
| 556 | */ |
| 557 | function get_the_chat( $id = 0 ) { |
| 558 | $post = empty( $id ) ? clone get_post() : get_post( $id ); |
| 559 | if ( empty( $post ) ) |
| 560 | return array(); |
| 561 | |
| 562 | $data = get_content_chat( get_paged_content( $post->post_content ) ); |
| 563 | if ( empty( $data ) ) |
| 564 | return array(); |
| 565 | |
| 566 | return $data; |
| 567 | } |
| 568 | |
| 569 | /** |