WordPress.org

Make WordPress Core


Ignore:
Timestamp:
06/17/2020 03:22:49 PM (5 months ago)
Author:
swissspidy
Message:

Sitemaps: Add XML sitemaps functionality to WordPress.

While web crawlers are able to discover pages from links within the site and from other sites, XML sitemaps supplement this approach by allowing crawlers to quickly and comprehensively identify all URLs included in the sitemap and learn other signals about those URLs using the associated metadata.

See https://make.wordpress.org/core/2020/06/10/merge-announcement-extensible-core-sitemaps/ for more details.

This feature exposes the sitemap index via /wp-sitemap.xml and exposes a variety of new filters and hooks for developers to modify the behavior. Users can disable sitemaps completely by turning off search engine visibility in WordPress admin.

This change also introduces a new esc_xml() function to escape strings for output in XML, as well as XML support to wp_kses_normalize_entities().

Props Adrian McShane, afragen, adamsilverstein, casiepa, flixos90, garrett-eclipse, joemcgill, kburgoine, kraftbj, milana_cap, pacifika, pbiron, pfefferle, Ruxandra Gradina, swissspidy, szepeviktor, tangrufus, tweetythierry.
Fixes #50117.
See #3670. See #19998.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/kses.php

    r47892 r48072  
    4848// Ensure that these variables are added to the global namespace
    4949// (e.g. if using namespaces / autoload in the current PHP environment).
    50 global $allowedposttags, $allowedtags, $allowedentitynames;
     50global $allowedposttags, $allowedtags, $allowedentitynames, $allowedxmlentitynames;
    5151
    5252if ( ! CUSTOM_TAGS ) {
     
    705705    );
    706706
     707    /**
     708     * @var string[] $allowedxmlentitynames Array of KSES allowed XML entitity names.
     709     * @since 5.5.0
     710     */
     711    $allowedxmlnamedentities = array(
     712        'amp',
     713        'lt',
     714        'gt',
     715        'apos',
     716        'quot',
     717    );
     718
    707719    $allowedposttags = array_map( '_wp_add_global_attributes', $allowedposttags );
    708720} else {
     
    17461758 * `AT&T`, `:` to `:`, `&#XYZZY;` to `&#XYZZY;` and so on.
    17471759 *
     1760 * When `$context` is set to 'xml', HTML entities are converted to their code points.  For
     1761 * example, `AT&T…&#XYZZY;` is converted to `AT&T…&#XYZZY;`.
     1762 *
    17481763 * @since 1.0.0
    1749  *
    1750  * @param string $string Content to normalize entities.
     1764 * @since 5.5.0 Added `$context` parameter.
     1765 *
     1766 * @param string $string  Content to normalize entities.
     1767 * @param string $context Context for normalization. Can be either 'html' or 'xml'.
     1768 *                        Default 'html'.
    17511769 * @return string Content with normalized entities.
    17521770 */
    1753 function wp_kses_normalize_entities( $string ) {
     1771function wp_kses_normalize_entities( $string, $context = 'html' ) {
    17541772    // Disarm all entities by converting & to &
    17551773    $string = str_replace( '&', '&', $string );
    17561774
    17571775    // Change back the allowed entities in our entity whitelist.
    1758     $string = preg_replace_callback( '/&([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_named_entities', $string );
     1776    if ( 'xml' === $context ) {
     1777        $string = preg_replace_callback( '/&([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_xml_named_entities', $string );
     1778    } else {
     1779        $string = preg_replace_callback( '/&([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_named_entities', $string );
     1780    }
    17591781    $string = preg_replace_callback( '/&#(0*[0-9]{1,7});/', 'wp_kses_normalize_entities2', $string );
    17601782    $string = preg_replace_callback( '/&#[Xx](0*[0-9A-Fa-f]{1,6});/', 'wp_kses_normalize_entities3', $string );
     
    17851807    $i = $matches[1];
    17861808    return ( ! in_array( $i, $allowedentitynames, true ) ) ? "&$i;" : "&$i;";
     1809}
     1810
     1811/**
     1812 * Callback for `wp_kses_normalize_entities()` regular expression.
     1813 *
     1814 * This function only accepts valid named entity references, which are finite,
     1815 * case-sensitive, and highly scrutinized by XML validators.  HTML named entity
     1816 * references are converted to their code points.
     1817 *
     1818 * @since 5.5.0
     1819 *
     1820 * @global array $allowedentitynames
     1821 * @global array $allowedxmlnamedentities
     1822 *
     1823 * @param array $matches preg_replace_callback() matches array.
     1824 * @return string Correctly encoded entity.
     1825 */
     1826function wp_kses_xml_named_entities( $matches ) {
     1827    global $allowedentitynames, $allowedxmlnamedentities;
     1828
     1829    if ( empty( $matches[1] ) ) {
     1830        return '';
     1831    }
     1832
     1833    $i = $matches[1];
     1834
     1835    if ( in_array( $i, $allowedxmlnamedentities, true ) ) {
     1836        return "&$i;";
     1837    } elseif ( in_array( $i, $allowedentitynames, true ) ) {
     1838        return html_entity_decode( "&$i;", ENT_HTML5 );
     1839    }
     1840
     1841    return "&$i;";
    17871842}
    17881843
Note: See TracChangeset for help on using the changeset viewer.