From 845cdc30c6c1b7c3e77f40cfdebb099b85b0a3a1 Mon Sep 17 00:00:00 2001
From: Paul Biron <paul@sparrowhawkcomputing.com>
Date: Tue, 25 Aug 2020 15:08:38 -0600
Subject: [PATCH] nsure sitemap URLs with paged > 1 work as expected.

Introduces the get_sitemap_url() function which is sort of equivalent to get_permalink() but for sitemaps.  That new function is used in redirect_canonical().
---
 src/wp-includes/canonical.php              | 15 ++--
 src/wp-includes/sitemaps.php               | 36 +++++++++
 tests/phpunit/tests/canonical/sitemaps.php | 37 +++++++++
 tests/phpunit/tests/sitemaps/functions.php | 88 ++++++++++++++++++++++
 4 files changed, 169 insertions(+), 7 deletions(-)

diff --git a/src/wp-includes/canonical.php b/src/wp-includes/canonical.php
index 5a63de3481..31f128e033 100644
--- a/src/wp-includes/canonical.php
+++ b/src/wp-includes/canonical.php
@@ -410,8 +410,11 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) {
 			$redirect['query'] = remove_query_arg( 'page', $redirect['query'] );
 		}
 
-		// Paging and feeds.
-		if ( get_query_var( 'paged' ) || is_feed() || get_query_var( 'cpage' ) ) {
+		if ( get_query_var( 'sitemap' ) ) {
+			$redirect_url      = get_sitemap_url( get_query_var( 'sitemap' ), get_query_var( 'sitemap-subtype' ), get_query_var( 'paged' ) );
+			$redirect['query'] = remove_query_arg( array( 'sitemap', 'sitemap-subtype', 'paged' ), $redirect['query'] );
+		} elseif ( get_query_var( 'paged' ) || is_feed() || get_query_var( 'cpage' ) ) {
+			// Paging and feeds.
 			$paged = get_query_var( 'paged' );
 			$feed  = get_query_var( 'feed' );
 			$cpage = get_query_var( 'cpage' );
@@ -508,12 +511,10 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) {
 				$redirect['path'] = trailingslashit( $redirect['path'] ) . $addl_path;
 			}
 
-			// Remove trailing slash for sitemaps requests.
-			if ( ! empty( get_query_var( 'sitemap' ) ) ) {
-				$redirect['path'] = untrailingslashit( $redirect['path'] );
-			}
-
 			$redirect_url = $redirect['scheme'] . '://' . $redirect['host'] . $redirect['path'];
+		} elseif ( get_query_var( 'sitemap' ) ) {
+			$redirect_url      = get_sitemap_url( get_query_var( 'sitemap' ), get_query_var( 'sitemap-subtype' ), get_query_var( 'paged' ) );
+			$redirect['query'] = remove_query_arg( array( 'sitemap', 'sitemap-subtype', 'paged' ), $redirect['query'] );
 		}
 
 		if ( 'wp-register.php' === basename( $redirect['path'] ) ) {
diff --git a/src/wp-includes/sitemaps.php b/src/wp-includes/sitemaps.php
index 62752dada9..1dfcd1d69c 100644
--- a/src/wp-includes/sitemaps.php
+++ b/src/wp-includes/sitemaps.php
@@ -87,3 +87,39 @@ function wp_sitemaps_get_max_urls( $object_type ) {
 	 */
 	return apply_filters( 'wp_sitemaps_max_urls', 2000, $object_type );
 }
+
+/**
+ * Retrieves the full URL for a sitemap.
+ *
+ * @since 5.5.1
+ *
+ * @param string $name         The sitemap name.
+ * @param string $subtype_name The sitemap subtype name.  Default empty string.
+ * @param int    $page         The page of the sitemap.  Default 1.
+ * @return string|false The sitemap URL or false if the sitemap doesn't exist.
+ */
+function get_sitemap_url( $name, $subtype_name = '', $page = 1 ) {
+	$sitemaps = wp_sitemaps_get_server();
+	if ( ! $sitemaps ) {
+		return false;
+	}
+
+	if ( 'index' === $name ) {
+		return $sitemaps->index->get_index_url();
+	}
+
+	$provider = $sitemaps->registry->get_provider( $name );
+	if ( ! $provider ) {
+		return false;
+	}
+
+	if ( $subtype_name && ! in_array( $subtype_name, array_keys( $provider->get_object_subtypes() ), true ) ) {
+		return false;
+	}
+
+	$page = absint( $page );
+	if ( 0 >= $page ) {
+		$page = 1;
+	}
+	return $provider->get_sitemap_url( $subtype_name, $page );
+}
diff --git a/tests/phpunit/tests/canonical/sitemaps.php b/tests/phpunit/tests/canonical/sitemaps.php
index dd0bdfbdb6..1e0c86fad4 100644
--- a/tests/phpunit/tests/canonical/sitemaps.php
+++ b/tests/phpunit/tests/canonical/sitemaps.php
@@ -40,4 +40,41 @@ class Tests_Canonical_Sitemaps extends WP_Canonical_UnitTestCase {
 		$this->assertCanonical( '/wp-sitemap.xsl/', '/wp-sitemap.xsl' );
 	}
 
+	/**
+	 * Ensure sitemaps redirects work as expected.
+	 *
+	 * @dataProvider sitemaps_canonical_redirects_provider
+	 * @ticket 50910
+	 */
+	public function test_sitemaps_canonical_redirects( $test_url, $expected ) {
+		$this->assertCanonical( $test_url, $expected, 50910 );
+	}
+
+	/**
+	 * Data provider for test_sitemaps_canonical_redirects.
+	 *
+	 * @return array[] {
+	 *     Data to test with.
+	 *
+	 *     @type string $0 The test URL.
+	 *     @type string $1 The expected canonical URL.
+	 * }
+	 */
+	public function sitemaps_canonical_redirects_provider() {
+		return array(
+			// Ugly/incorrect versions redirect correctly.
+			array( '/?sitemap=index', '/wp-sitemap.xml' ),
+			array( '/wp-sitemap.xml/', '/wp-sitemap.xml' ),
+			array( '/?sitemap=posts&sitemap-subtype=post', '/wp-sitemap-posts-post-1.xml' ),
+			array( '/?sitemap=posts&sitemap-subtype=post&paged=2', '/wp-sitemap-posts-post-2.xml' ),
+			array( '/?sitemap=taxonomies&sitemap-subtype=category', '/wp-sitemap-taxonomies-category-1.xml' ),
+			array( '/?sitemap=taxonomies&sitemap-subtype=category&paged=2', '/wp-sitemap-taxonomies-category-2.xml' ),
+
+			// Pretty versions don't redirect incorrectly.
+			array( '/wp-sitemap-posts-post-1.xml', '/wp-sitemap-posts-post-1.xml' ),
+			array( '/wp-sitemap-posts-post-2.xml', '/wp-sitemap-posts-post-2.xml' ),
+			array( '/wp-sitemap-taxonomies-category-1.xml', '/wp-sitemap-taxonomies-category-1.xml' ),
+			array( '/wp-sitemap-taxonomies-category-2.xml', '/wp-sitemap-taxonomies-category-2.xml' ),
+		);
+	}
 }
diff --git a/tests/phpunit/tests/sitemaps/functions.php b/tests/phpunit/tests/sitemaps/functions.php
index 3fe904a6ae..7ab5c5490f 100644
--- a/tests/phpunit/tests/sitemaps/functions.php
+++ b/tests/phpunit/tests/sitemaps/functions.php
@@ -58,4 +58,92 @@ class Test_Sitemaps_Functions extends WP_UnitTestCase {
 			$this->assertTrue( is_a( $sitemaps[ $name ], $provider ), "Default $name sitemap is not a $provider object." );
 		}
 	}
+
+	/**
+	 * Test get_sitemap_url() with ugly permalinks.
+	 *
+	 * @dataProvider ugly_permalinks_provider
+	 */
+	public function test_get_sitemap_url_ugly_permalinks( $name, $subtype_name, $page, $expected ) {
+		$actual = get_sitemap_url( $name, $subtype_name, $page );
+
+		$this->assertSame( $expected, $actual );
+	}
+
+	/**
+	 * Test get_sitemap_url() with pretty permalinks.
+	 *
+	 * @dataProvider pretty_permalinks_provider
+	 */
+	public function test_get_sitemap_url_pretty_permalinks( $name, $subtype_name, $page, $expected ) {
+		$this->set_permalink_structure( '/%postname%/' );
+
+		$actual = get_sitemap_url( $name, $subtype_name, $page );
+
+		$this->assertSame( $expected, $actual );
+	}
+
+	/**
+	 * Data provider for test_get_sitemap_url_ugly_permalinks.
+	 *
+	 * @return array[] {
+	 *     Data to test with.
+	 *
+	 *     @type string       $0 Sitemap name.
+	 *     @type string       $1 Sitemap subtype name.
+	 *     @type int          $3 Sitemap page.
+	 *     @type string|false $4 Sitemap URL.
+	 * }
+	 */
+	function ugly_permalinks_provider() {
+		return array(
+			array( 'posts', 'post', 1, home_url( '/?sitemap=posts&sitemap-subtype=post&paged=1' ) ),
+			array( 'posts', 'post', 0, home_url( '/?sitemap=posts&sitemap-subtype=post&paged=1' ) ),
+			array( 'posts', 'page', 1, home_url( '/?sitemap=posts&sitemap-subtype=page&paged=1' ) ),
+			array( 'posts', 'page', 5, home_url( '/?sitemap=posts&sitemap-subtype=page&paged=5' ) ),
+			// post_type doesn't exist.
+			array( 'posts', 'foo', 5, false ),
+			array( 'taxonomies', 'category', 1, home_url( '/?sitemap=taxonomies&sitemap-subtype=category&paged=1' ) ),
+			array( 'taxonomies', 'post_tag', 1, home_url( '/?sitemap=taxonomies&sitemap-subtype=post_tag&paged=1' ) ),
+			array( 'taxonomies', 'post_tag', -1, home_url( '/?sitemap=taxonomies&sitemap-subtype=post_tag&paged=1' ) ),
+			// negative paged, gets converted to it's absolute value.
+			array( 'users', '', 4, home_url( '/?sitemap=users&paged=4' ) ),
+			// users provider doesn't allow subtypes.
+			array( 'users', 'foo', 4, false ),
+			// provider doesn't exist.
+			array( 'foo', '', 4, false ),
+		);
+	}
+
+	/**
+	 * Data provider for test_get_sitemap_url_pretty_permalinks
+	 *
+	 * @return array[] {
+	 *     Data to test with.
+	 *
+	 *     @type string       $0 Sitemap name.
+	 *     @type string       $1 Sitemap subtype name.
+	 *     @type int          $3 Sitemap page.
+	 *     @type string|false $4 Sitemap URL.
+	 * }
+	 */
+	function pretty_permalinks_provider() {
+		return array(
+			array( 'posts', 'post', 1, home_url( '/wp-sitemap-posts-post-1.xml' ) ),
+			array( 'posts', 'post', 0, home_url( '/wp-sitemap-posts-post-1.xml' ) ),
+			array( 'posts', 'page', 1, home_url( '/wp-sitemap-posts-page-1.xml' ) ),
+			array( 'posts', 'page', 5, home_url( '/wp-sitemap-posts-page-5.xml' ) ),
+			// post_type doesn't exist.
+			array( 'posts', 'foo', 5, false ),
+			array( 'taxonomies', 'category', 1, home_url( '/wp-sitemap-taxonomies-category-1.xml' ) ),
+			array( 'taxonomies', 'post_tag', 1, home_url( '/wp-sitemap-taxonomies-post_tag-1.xml' ) ),
+			// negative paged, gets converted to it's absolute value.
+			array( 'taxonomies', 'post_tag', -1, home_url( '/wp-sitemap-taxonomies-post_tag-1.xml' ) ),
+			array( 'users', '', 4, home_url( '/wp-sitemap-users-4.xml' ) ),
+			// users provider doesn't allow subtypes.
+			array( 'users', 'foo', 4, false ),
+			// provider doesn't exist.
+			array( 'foo', '', 4, false ),
+		);
+	}
 }
-- 
2.26.2.windows.1

