| | 600 | * Removes otherwise valid utf8 characters that break XML output. |
| | 601 | * |
| | 602 | * When outputting user supplied content in an XML context we should strip these control and other unwanted characters - they are unprintable and just break feed parsers. |
| | 603 | * |
| | 604 | * @since 3.6.0 |
| | 605 | * |
| | 606 | * @param string $string User supplied content that may contain dis-allowed characters. |
| | 607 | * @return string Filtered string with space in place of removed characters. |
| | 608 | */ |
| | 609 | |
| | 610 | function strip_for_xml( $string ) { |
| | 611 | // Store the site charset as a static to avoid multiple calls to get_option() |
| | 612 | static $is_utf8; |
| | 613 | if ( ! isset( $is_utf8 ) ) { |
| | 614 | $is_utf8 = in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ); |
| | 615 | } |
| | 616 | if ( ! $is_utf8 ) { |
| | 617 | return $string; |
| | 618 | } |
| | 619 | |
| | 620 | return preg_replace( '/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+/u', ' ', $string ); |
| | 621 | } |
| | 622 | |
| | 623 | /** |