WordPress.org

Make WordPress Core

Changeset 25632


Ignore:
Timestamp:
09/27/13 17:33:36 (7 months ago)
Author:
nacin
Message:

Order search results by relevance, rather than by date.

The ordering logic is as follows:

  • Full sentence matches in post titles.
  • All search terms in post titles.
  • Any search terms in post titles.
  • Full sentence matches in post content.

Each section and any remaining posts are then sorted by date.

Introduces some filters:

  • wp_search_stopwords, to filter stop words ignored in WHERE.
  • posts_search_orderby, to filter the ORDER BY when ordering search results.

props azaozz, wonderboymusic.
fixes #7394.

Location:
trunk/src/wp-includes
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/deprecated.php

    r24436 r25632  
    33183318 
    33193319/** 
     3320 * Formerly used internally to tidy up the search terms. 
     3321 * 
     3322 * @access private 
     3323 * @since 2.9.0 
     3324 * @deprecated 3.7.0 
     3325 */ 
     3326function _search_terms_tidy( $t ) { 
     3327    _deprecated_function( __FUNCTION__, '3.5', '' ); 
     3328    return trim( $t, "\"'\n\r " ); 
     3329} 
     3330 
     3331/** 
    33203332 * Check if the installed version of GD supports particular image type 
    33213333 * 
  • trunk/src/wp-includes/functions.php

    r25437 r25632  
    37043704 
    37053705/** 
    3706  * Used internally to tidy up the search terms. 
    3707  * 
    3708  * @access private 
    3709  * @since 2.9.0 
    3710  * 
    3711  * @param string $t 
    3712  * @return string 
    3713  */ 
    3714 function _search_terms_tidy($t) { 
    3715     return trim($t, "\"'\n\r "); 
    3716 } 
    3717  
    3718 /** 
    37193706 * Returns true. 
    37203707 * 
  • trunk/src/wp-includes/query.php

    r25605 r25632  
    12891289     */ 
    12901290     var $thumbnails_cached = false; 
     1291 
     1292    /** 
     1293     * Cached list of search stopwords. 
     1294     * 
     1295     * @since 3.7.0 
     1296     * @var array 
     1297     */ 
     1298    private $stopwords; 
    12911299 
    12921300    /** 
     
    14711479        if ( '' !== $qv['menu_order'] ) $qv['menu_order'] = absint($qv['menu_order']); 
    14721480 
     1481        // Fairly insane upper bound for search string lengths. 
     1482        if ( ! empty( $qv['s'] ) && strlen( $qv['s'] ) > 1600 ) 
     1483            $qv['s'] = ''; 
     1484 
    14731485        // Compat. Map subpost to attachment. 
    14741486        if ( '' != $qv['subpost'] ) 
     
    18961908 
    18971909    /** 
     1910     * Generate SQL for the WHERE clause based on passed search terms. 
     1911     * 
     1912     * @since 3.7.0 
     1913     * 
     1914     * @global type $wpdb 
     1915     * @param array $q Query variables. 
     1916     */ 
     1917    protected function parse_search( &$q ) { 
     1918        global $wpdb; 
     1919 
     1920        $search = ''; 
     1921 
     1922        // added slashes screw with quote grouping when done early, so done later 
     1923        $q['s'] = stripslashes( $q['s'] ); 
     1924        if ( empty( $_GET['s'] ) && $this->is_main_query() ) 
     1925            $q['s'] = urldecode( $q['s'] ); 
     1926        // there are no line breaks in <input /> fields 
     1927        $q['s'] = str_replace( array( "\r", "\n" ), '', $q['s'] ); 
     1928        $q['search_terms_count'] = 1; 
     1929        if ( ! empty( $q['sentence'] ) ) { 
     1930            $q['search_terms'] = array( $q['s'] ); 
     1931        } else { 
     1932            if ( preg_match_all( '/".*?("|$)|((?<=[\t ",+])|^)[^\t ",+]+/', $q['s'], $matches ) ) { 
     1933                $q['search_terms_count'] = count( $matches[0] ); 
     1934                $q['search_terms'] = $this->parse_search_terms( $matches[0] ); 
     1935                // if the search string has only short terms or stopwords, or is 10+ terms long, match it as sentence 
     1936                if ( empty( $q['search_terms'] ) || count( $q['search_terms'] ) > 9 ) 
     1937                    $q['search_terms'] = array( $q['s'] ); 
     1938            } else { 
     1939                $q['search_terms'] = array( $q['s'] ); 
     1940            } 
     1941        } 
     1942 
     1943        $n = ! empty( $q['exact'] ) ? '' : '%'; 
     1944        $searchand = ''; 
     1945        $q['search_orderby_title'] = array(); 
     1946        foreach ( $q['search_terms'] as $term ) { 
     1947            $term = like_escape( esc_sql( $term ) ); 
     1948            if ( $n ) 
     1949                $q['search_orderby_title'][] = "$wpdb->posts.post_title LIKE '%$term%'"; 
     1950 
     1951            $search .= "{$searchand}(($wpdb->posts.post_title LIKE '{$n}{$term}{$n}') OR ($wpdb->posts.post_content LIKE '{$n}{$term}{$n}'))"; 
     1952            $searchand = ' AND '; 
     1953        } 
     1954 
     1955        if ( ! empty( $search ) ) { 
     1956            $search = " AND ({$search}) "; 
     1957            if ( ! is_user_logged_in() ) 
     1958                $search .= " AND ($wpdb->posts.post_password = '') "; 
     1959        } 
     1960 
     1961        /** 
     1962         * Filter the search SQL that is used in the WHERE clause of WP_Query. 
     1963         * 
     1964         * @since 3.0.0 
     1965         * 
     1966         * @param string   $search Search SQL for WHERE clause. 
     1967         * @param WP_Query $this   The current WP_Query object. 
     1968         */ 
     1969        return apply_filters_ref_array( 'posts_search', array( $search, &$this ) ); 
     1970    } 
     1971 
     1972    /** 
     1973     * Check if the terms are suitable for searching. 
     1974     * 
     1975     * Uses an array of stopwords (terms) that are excluded from the separate 
     1976     * term matching when searching for posts. The list of English stopwords is 
     1977     * the approximate search engines list, and is translatable. 
     1978     * 
     1979     * @since 3.7.0 
     1980     * 
     1981     * @param array Terms to check. 
     1982     * @return array Terms that are not stopwords. 
     1983     */ 
     1984    protected function parse_search_terms( $terms ) { 
     1985        $strtolower = function_exists( 'mb_strtolower' ) ? 'mb_strtolower' : 'strtolower'; 
     1986        $checked = array(); 
     1987 
     1988        $stopwords = $this->get_search_stopwords(); 
     1989 
     1990        foreach ( $terms as $term ) { 
     1991            // keep before/after spaces when term is for exact match 
     1992            if ( preg_match( '/^".+"$/', $term ) ) 
     1993                $term = trim( $term, "\"'" ); 
     1994            else 
     1995                $term = trim( $term, "\"' " ); 
     1996 
     1997            // \p{L} matches a single letter that is not a Chinese, Japanese, etc. char 
     1998            if ( ! $term || preg_match( '/^\p{L}$/u', $term ) ) 
     1999                continue; 
     2000 
     2001            if ( in_array( call_user_func( $strtolower, $term ), $stopwords, true ) ) 
     2002                continue; 
     2003 
     2004            $checked[] = $term; 
     2005        } 
     2006 
     2007        return $checked; 
     2008    } 
     2009 
     2010    /** 
     2011     * Retrieve stopwords used when parsing search terms. 
     2012     * 
     2013     * @since 3.7.0 
     2014     * 
     2015     * @return array Stopwords. 
     2016     */ 
     2017    protected function get_search_stopwords() { 
     2018        if ( isset( $this->stopwords ) ) 
     2019            return $this->stopwords; 
     2020 
     2021        /* translators: This is a comma-separated list of very common words that should be excluded from a search, 
     2022         * like a, an, and the. These are usually called "stopwords". You should not simply translate these individual 
     2023         * words into your language. Instead, look for and provide commonly accepted stopwords in your language. 
     2024         */ 
     2025        $words = explode( ',', _x( 'about,an,are,as,at,be,by,com,for,from,how,in,is,it,of,on,or,that,the,this,to,was,what,when,where,who,will,with,www', 
     2026            'Comma-separated list of search stopwords in your language' ) ); 
     2027 
     2028        foreach( $words as $word ) { 
     2029            $word = trim( $word, "\r\n\t " ); 
     2030            if ( $word ) 
     2031                $stopwords[] = $word; 
     2032        } 
     2033 
     2034        /** 
     2035         * Filter stopwords used when parsing search terms. 
     2036         * 
     2037         * @since 3.7.0 
     2038         * 
     2039         * @param array $stopwords Stopwords. 
     2040         */ 
     2041        $this->stopwords = apply_filters( 'wp_search_stopwords', $stopwords ); 
     2042        return $this->stopwords; 
     2043    } 
     2044 
     2045    /** 
     2046     * Generate SQL for the ORDER BY condition based on passed search terms. 
     2047     * 
     2048     * @global wpdb $wpdb 
     2049     * @param array $q Query variables. 
     2050     * @return string ORDER BY clause. 
     2051     */ 
     2052    protected function parse_search_order( &$q ) { 
     2053        global $wpdb; 
     2054 
     2055        $search_orderby = ''; 
     2056 
     2057        if ( $q['search_terms_count'] > 1 ) { 
     2058            $num_terms = count( $q['search_orderby_title'] ); 
     2059            $search_orderby_s = like_escape( esc_sql( $q['s'] ) ); 
     2060 
     2061            $search_orderby = '(CASE '; 
     2062            // sentence match in 'post_title' 
     2063            $search_orderby .= "WHEN $wpdb->posts.post_title LIKE '%{$search_orderby_s}%' THEN 1 "; 
     2064 
     2065            // sanity limit, sort as sentence when more than 6 terms 
     2066            // (few searches are longer than 6 terms and most titles are not) 
     2067            if ( $num_terms < 7 ) { 
     2068                // all words in title 
     2069                $search_orderby .= 'WHEN ' . implode( ' AND ', $q['search_orderby_title'] ) . ' THEN 2 '; 
     2070                // any word in title, not needed when $num_terms == 1 
     2071                if ( $num_terms > 1 ) 
     2072                    $search_orderby .= 'WHEN ' . implode( ' OR ', $q['search_orderby_title'] ) . ' THEN 3 '; 
     2073            } 
     2074 
     2075            // sentence match in 'post_content' 
     2076            $search_orderby .= "WHEN $wpdb->posts.post_content LIKE '%{$search_orderby_s}%' THEN 4 "; 
     2077            $search_orderby .= 'ELSE 5 END)'; 
     2078        } else { 
     2079            // single word or sentence search 
     2080            $search_orderby = reset( $q['search_orderby_title'] ) . ' DESC'; 
     2081        } 
     2082 
     2083        return $search_orderby; 
     2084    } 
     2085 
     2086    /** 
    18982087     * Sets the 404 property and saves whether query is feed. 
    18992088     * 
     
    22362425        } 
    22372426 
    2238         // If a search pattern is specified, load the posts that match 
    2239         if ( !empty($q['s']) ) { 
    2240             // added slashes screw with quote grouping when done early, so done later 
    2241             $q['s'] = stripslashes($q['s']); 
    2242             if ( empty( $_GET['s'] ) && $this->is_main_query() ) 
    2243                 $q['s'] = urldecode($q['s']); 
    2244             if ( !empty($q['sentence']) ) { 
    2245                 $q['search_terms'] = array($q['s']); 
    2246             } else { 
    2247                 preg_match_all('/".*?("|$)|((?<=[\r\n\t ",+])|^)[^\r\n\t ",+]+/', $q['s'], $matches); 
    2248                 $q['search_terms'] = array_map('_search_terms_tidy', $matches[0]); 
    2249             } 
    2250             $n = !empty($q['exact']) ? '' : '%'; 
    2251             $searchand = ''; 
    2252             foreach( (array) $q['search_terms'] as $term ) { 
    2253                 $term = esc_sql( like_escape( $term ) ); 
    2254                 $search .= "{$searchand}(($wpdb->posts.post_title LIKE '{$n}{$term}{$n}') OR ($wpdb->posts.post_content LIKE '{$n}{$term}{$n}'))"; 
    2255                 $searchand = ' AND '; 
    2256             } 
    2257  
    2258             if ( !empty($search) ) { 
    2259                 $search = " AND ({$search}) "; 
    2260                 if ( !is_user_logged_in() ) 
    2261                     $search .= " AND ($wpdb->posts.post_password = '') "; 
    2262             } 
    2263         } 
    2264  
    2265         // Allow plugins to contextually add/remove/modify the search section of the database query 
    2266         $search = apply_filters_ref_array('posts_search', array( $search, &$this ) ); 
     2427        // If a search pattern is specified, load the posts that match. 
     2428        if ( ! empty( $q['s'] ) ) 
     2429            $search = $this->parse_search( $q ); 
    22672430 
    22682431        // Taxonomies 
     
    24622625        } 
    24632626 
     2627        // Order search results by relevance only when another "orderby" is not specified in the query. 
     2628        if ( ! empty( $q['s'] ) ) { 
     2629            $search_orderby = ''; 
     2630            if ( ! empty( $q['search_orderby_title'] ) && empty( $q['orderby'] ) ) 
     2631                $search_orderby = $this->parse_search_order( $q ); 
     2632 
     2633            /** 
     2634             * Filter the ORDER BY used when ordering search results. 
     2635             * 
     2636             * @since 3.7.0 
     2637             * 
     2638             * @param string   $search_orderby The ORDER BY clause. 
     2639             * @param WP_Query $this           The current WP_Query instance. 
     2640             */ 
     2641            $search_orderby = apply_filters( 'posts_search_orderby', $search_orderby, $this ); 
     2642            if ( $search_orderby ) 
     2643                $orderby = $orderby ? $search_orderby . ', ' . $orderby : $search_orderby; 
     2644        } 
     2645 
    24642646        if ( is_array( $post_type ) && count( $post_type ) > 1 ) { 
    24652647            $post_type_cap = 'multiple_post_type'; 
Note: See TracChangeset for help on using the changeset viewer.