Make WordPress Core

Changeset 25632


Ignore:
Timestamp:
09/27/2013 05:33:36 PM (11 years ago)
Author:
nacin
Message:

Order search results by relevance, rather than by date.

The ordering logic is as follows:

  • Full sentence matches in post titles.
  • All search terms in post titles.
  • Any search terms in post titles.
  • Full sentence matches in post content.

Each section and any remaining posts are then sorted by date.

Introduces some filters:

  • wp_search_stopwords, to filter stop words ignored in WHERE.
  • posts_search_orderby, to filter the ORDER BY when ordering search results.

props azaozz, wonderboymusic.
fixes #7394.

Location:
trunk/src/wp-includes
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/deprecated.php

    r24436 r25632  
    33183318
    33193319/**
     3320 * Formerly used internally to tidy up the search terms.
     3321 *
     3322 * @access private
     3323 * @since 2.9.0
     3324 * @deprecated 3.7.0
     3325 */
     3326function _search_terms_tidy( $t ) {
     3327    _deprecated_function( __FUNCTION__, '3.5', '' );
     3328    return trim( $t, "\"'\n\r " );
     3329}
     3330
     3331/**
    33203332 * Check if the installed version of GD supports particular image type
    33213333 *
  • trunk/src/wp-includes/functions.php

    r25437 r25632  
    37043704
    37053705/**
    3706  * Used internally to tidy up the search terms.
    3707  *
    3708  * @access private
    3709  * @since 2.9.0
    3710  *
    3711  * @param string $t
    3712  * @return string
    3713  */
    3714 function _search_terms_tidy($t) {
    3715     return trim($t, "\"'\n\r ");
    3716 }
    3717 
    3718 /**
    37193706 * Returns true.
    37203707 *
  • trunk/src/wp-includes/query.php

    r25605 r25632  
    12891289     */
    12901290     var $thumbnails_cached = false;
     1291
     1292    /**
     1293     * Cached list of search stopwords.
     1294     *
     1295     * @since 3.7.0
     1296     * @var array
     1297     */
     1298    private $stopwords;
    12911299
    12921300    /**
     
    14711479        if ( '' !== $qv['menu_order'] ) $qv['menu_order'] = absint($qv['menu_order']);
    14721480
     1481        // Fairly insane upper bound for search string lengths.
     1482        if ( ! empty( $qv['s'] ) && strlen( $qv['s'] ) > 1600 )
     1483            $qv['s'] = '';
     1484
    14731485        // Compat. Map subpost to attachment.
    14741486        if ( '' != $qv['subpost'] )
     
    18961908
    18971909    /**
     1910     * Generate SQL for the WHERE clause based on passed search terms.
     1911     *
     1912     * @since 3.7.0
     1913     *
     1914     * @global type $wpdb
     1915     * @param array $q Query variables.
     1916     */
     1917    protected function parse_search( &$q ) {
     1918        global $wpdb;
     1919
     1920        $search = '';
     1921
     1922        // added slashes screw with quote grouping when done early, so done later
     1923        $q['s'] = stripslashes( $q['s'] );
     1924        if ( empty( $_GET['s'] ) && $this->is_main_query() )
     1925            $q['s'] = urldecode( $q['s'] );
     1926        // there are no line breaks in <input /> fields
     1927        $q['s'] = str_replace( array( "\r", "\n" ), '', $q['s'] );
     1928        $q['search_terms_count'] = 1;
     1929        if ( ! empty( $q['sentence'] ) ) {
     1930            $q['search_terms'] = array( $q['s'] );
     1931        } else {
     1932            if ( preg_match_all( '/".*?("|$)|((?<=[\t ",+])|^)[^\t ",+]+/', $q['s'], $matches ) ) {
     1933                $q['search_terms_count'] = count( $matches[0] );
     1934                $q['search_terms'] = $this->parse_search_terms( $matches[0] );
     1935                // if the search string has only short terms or stopwords, or is 10+ terms long, match it as sentence
     1936                if ( empty( $q['search_terms'] ) || count( $q['search_terms'] ) > 9 )
     1937                    $q['search_terms'] = array( $q['s'] );
     1938            } else {
     1939                $q['search_terms'] = array( $q['s'] );
     1940            }
     1941        }
     1942
     1943        $n = ! empty( $q['exact'] ) ? '' : '%';
     1944        $searchand = '';
     1945        $q['search_orderby_title'] = array();
     1946        foreach ( $q['search_terms'] as $term ) {
     1947            $term = like_escape( esc_sql( $term ) );
     1948            if ( $n )
     1949                $q['search_orderby_title'][] = "$wpdb->posts.post_title LIKE '%$term%'";
     1950
     1951            $search .= "{$searchand}(($wpdb->posts.post_title LIKE '{$n}{$term}{$n}') OR ($wpdb->posts.post_content LIKE '{$n}{$term}{$n}'))";
     1952            $searchand = ' AND ';
     1953        }
     1954
     1955        if ( ! empty( $search ) ) {
     1956            $search = " AND ({$search}) ";
     1957            if ( ! is_user_logged_in() )
     1958                $search .= " AND ($wpdb->posts.post_password = '') ";
     1959        }
     1960
     1961        /**
     1962         * Filter the search SQL that is used in the WHERE clause of WP_Query.
     1963         *
     1964         * @since 3.0.0
     1965         *
     1966         * @param string   $search Search SQL for WHERE clause.
     1967         * @param WP_Query $this   The current WP_Query object.
     1968         */
     1969        return apply_filters_ref_array( 'posts_search', array( $search, &$this ) );
     1970    }
     1971
     1972    /**
     1973     * Check if the terms are suitable for searching.
     1974     *
     1975     * Uses an array of stopwords (terms) that are excluded from the separate
     1976     * term matching when searching for posts. The list of English stopwords is
     1977     * the approximate search engines list, and is translatable.
     1978     *
     1979     * @since 3.7.0
     1980     *
     1981     * @param array Terms to check.
     1982     * @return array Terms that are not stopwords.
     1983     */
     1984    protected function parse_search_terms( $terms ) {
     1985        $strtolower = function_exists( 'mb_strtolower' ) ? 'mb_strtolower' : 'strtolower';
     1986        $checked = array();
     1987
     1988        $stopwords = $this->get_search_stopwords();
     1989
     1990        foreach ( $terms as $term ) {
     1991            // keep before/after spaces when term is for exact match
     1992            if ( preg_match( '/^".+"$/', $term ) )
     1993                $term = trim( $term, "\"'" );
     1994            else
     1995                $term = trim( $term, "\"' " );
     1996
     1997            // \p{L} matches a single letter that is not a Chinese, Japanese, etc. char
     1998            if ( ! $term || preg_match( '/^\p{L}$/u', $term ) )
     1999                continue;
     2000
     2001            if ( in_array( call_user_func( $strtolower, $term ), $stopwords, true ) )
     2002                continue;
     2003
     2004            $checked[] = $term;
     2005        }
     2006
     2007        return $checked;
     2008    }
     2009
     2010    /**
     2011     * Retrieve stopwords used when parsing search terms.
     2012     *
     2013     * @since 3.7.0
     2014     *
     2015     * @return array Stopwords.
     2016     */
     2017    protected function get_search_stopwords() {
     2018        if ( isset( $this->stopwords ) )
     2019            return $this->stopwords;
     2020
     2021        /* translators: This is a comma-separated list of very common words that should be excluded from a search,
     2022         * like a, an, and the. These are usually called "stopwords". You should not simply translate these individual
     2023         * words into your language. Instead, look for and provide commonly accepted stopwords in your language.
     2024         */
     2025        $words = explode( ',', _x( 'about,an,are,as,at,be,by,com,for,from,how,in,is,it,of,on,or,that,the,this,to,was,what,when,where,who,will,with,www',
     2026            'Comma-separated list of search stopwords in your language' ) );
     2027
     2028        foreach( $words as $word ) {
     2029            $word = trim( $word, "\r\n\t " );
     2030            if ( $word )
     2031                $stopwords[] = $word;
     2032        }
     2033
     2034        /**
     2035         * Filter stopwords used when parsing search terms.
     2036         *
     2037         * @since 3.7.0
     2038         *
     2039         * @param array $stopwords Stopwords.
     2040         */
     2041        $this->stopwords = apply_filters( 'wp_search_stopwords', $stopwords );
     2042        return $this->stopwords;
     2043    }
     2044
     2045    /**
     2046     * Generate SQL for the ORDER BY condition based on passed search terms.
     2047     *
     2048     * @global wpdb $wpdb
     2049     * @param array $q Query variables.
     2050     * @return string ORDER BY clause.
     2051     */
     2052    protected function parse_search_order( &$q ) {
     2053        global $wpdb;
     2054
     2055        $search_orderby = '';
     2056
     2057        if ( $q['search_terms_count'] > 1 ) {
     2058            $num_terms = count( $q['search_orderby_title'] );
     2059            $search_orderby_s = like_escape( esc_sql( $q['s'] ) );
     2060
     2061            $search_orderby = '(CASE ';
     2062            // sentence match in 'post_title'
     2063            $search_orderby .= "WHEN $wpdb->posts.post_title LIKE '%{$search_orderby_s}%' THEN 1 ";
     2064
     2065            // sanity limit, sort as sentence when more than 6 terms
     2066            // (few searches are longer than 6 terms and most titles are not)
     2067            if ( $num_terms < 7 ) {
     2068                // all words in title
     2069                $search_orderby .= 'WHEN ' . implode( ' AND ', $q['search_orderby_title'] ) . ' THEN 2 ';
     2070                // any word in title, not needed when $num_terms == 1
     2071                if ( $num_terms > 1 )
     2072                    $search_orderby .= 'WHEN ' . implode( ' OR ', $q['search_orderby_title'] ) . ' THEN 3 ';
     2073            }
     2074
     2075            // sentence match in 'post_content'
     2076            $search_orderby .= "WHEN $wpdb->posts.post_content LIKE '%{$search_orderby_s}%' THEN 4 ";
     2077            $search_orderby .= 'ELSE 5 END)';
     2078        } else {
     2079            // single word or sentence search
     2080            $search_orderby = reset( $q['search_orderby_title'] ) . ' DESC';
     2081        }
     2082
     2083        return $search_orderby;
     2084    }
     2085
     2086    /**
    18982087     * Sets the 404 property and saves whether query is feed.
    18992088     *
     
    22362425        }
    22372426
    2238         // If a search pattern is specified, load the posts that match
    2239         if ( !empty($q['s']) ) {
    2240             // added slashes screw with quote grouping when done early, so done later
    2241             $q['s'] = stripslashes($q['s']);
    2242             if ( empty( $_GET['s'] ) && $this->is_main_query() )
    2243                 $q['s'] = urldecode($q['s']);
    2244             if ( !empty($q['sentence']) ) {
    2245                 $q['search_terms'] = array($q['s']);
    2246             } else {
    2247                 preg_match_all('/".*?("|$)|((?<=[\r\n\t ",+])|^)[^\r\n\t ",+]+/', $q['s'], $matches);
    2248                 $q['search_terms'] = array_map('_search_terms_tidy', $matches[0]);
    2249             }
    2250             $n = !empty($q['exact']) ? '' : '%';
    2251             $searchand = '';
    2252             foreach( (array) $q['search_terms'] as $term ) {
    2253                 $term = esc_sql( like_escape( $term ) );
    2254                 $search .= "{$searchand}(($wpdb->posts.post_title LIKE '{$n}{$term}{$n}') OR ($wpdb->posts.post_content LIKE '{$n}{$term}{$n}'))";
    2255                 $searchand = ' AND ';
    2256             }
    2257 
    2258             if ( !empty($search) ) {
    2259                 $search = " AND ({$search}) ";
    2260                 if ( !is_user_logged_in() )
    2261                     $search .= " AND ($wpdb->posts.post_password = '') ";
    2262             }
    2263         }
    2264 
    2265         // Allow plugins to contextually add/remove/modify the search section of the database query
    2266         $search = apply_filters_ref_array('posts_search', array( $search, &$this ) );
     2427        // If a search pattern is specified, load the posts that match.
     2428        if ( ! empty( $q['s'] ) )
     2429            $search = $this->parse_search( $q );
    22672430
    22682431        // Taxonomies
     
    24622625        }
    24632626
     2627        // Order search results by relevance only when another "orderby" is not specified in the query.
     2628        if ( ! empty( $q['s'] ) ) {
     2629            $search_orderby = '';
     2630            if ( ! empty( $q['search_orderby_title'] ) && empty( $q['orderby'] ) )
     2631                $search_orderby = $this->parse_search_order( $q );
     2632
     2633            /**
     2634             * Filter the ORDER BY used when ordering search results.
     2635             *
     2636             * @since 3.7.0
     2637             *
     2638             * @param string   $search_orderby The ORDER BY clause.
     2639             * @param WP_Query $this           The current WP_Query instance.
     2640             */
     2641            $search_orderby = apply_filters( 'posts_search_orderby', $search_orderby, $this );
     2642            if ( $search_orderby )
     2643                $orderby = $orderby ? $search_orderby . ', ' . $orderby : $search_orderby;
     2644        }
     2645
    24642646        if ( is_array( $post_type ) && count( $post_type ) > 1 ) {
    24652647            $post_type_cap = 'multiple_post_type';
Note: See TracChangeset for help on using the changeset viewer.