Make WordPress Core

Ticket #22363: 22363.6.patch

File 22363.6.patch, 4.9 KB (added by p_enrique, 12 years ago)

Better check for PCRE UTF-8, updated comment block

  • src/wp-includes/default-filters.php

     
    177177add_filter( 'tiny_mce_before_init',     '_mce_set_direction'                  );
    178178add_filter( 'pre_kses',                 'wp_pre_kses_less_than'               );
    179179add_filter( 'sanitize_title',           'sanitize_title_with_dashes',   10, 3 );
     180add_filter( 'sanitize_file_name',               'remove_accents'                                          );
    180181add_action( 'check_comment_flood',      'check_comment_flood_db',       10, 3 );
    181182add_filter( 'comment_flood_filter',     'wp_throttle_comment_flood',    10, 3 );
    182183add_filter( 'pre_comment_content',      'wp_rel_nofollow',              15    );
  • src/wp-includes/formatting.php

     
    826826}
    827827
    828828/**
    829  * Sanitizes a filename, replacing whitespace with dashes.
     829 * Sanitizes a filename, replacing whitespace and extra characters with dashes.
    830830 *
    831  * Removes special characters that are illegal in filenames on certain
    832  * operating systems and special characters requiring special escaping
    833  * to manipulate at the command line. Replaces spaces and consecutive
    834  * dashes with a single dash. Trims period, dash and underscore from beginning
    835  * and end of filename.
     831 * Replaces all non-alphabetical, non-decimal characters (including
     832 * spaces) with dashes. Strips HTML tags. Munges extraneous file extensions
     833 * with underscores.
     834 * If the PCRE UTF-8 extension is availabe:
     835 *  - Accepts and returns UTF-8 filenames
     836 *  - Converts HTML entities and strips them if they are not alphanumerical
     837 * Converts the filenames to lowercase when possible.
    836838 *
    837839 * @since 2.1.0
    838840 *
     
    841843 */
    842844function sanitize_file_name( $filename ) {
    843845        $filename_raw = $filename;
     846       
     847        $filename = wp_strip_all_tags( $filename );
     848       
     849        // Check if PCRE UTF-8 extension is compiled and working.
     850        static $pcre_utf8 = null;
     851        if ( is_null( $pcre_utf8 ) )
     852                $pcre_utf8 = ( 1 === @preg_match( '`[\p{L}]`u', "\xc3\xa0" ) ); // Try to match "latin small letter a with grave". Returns (int) 1 or (boolean) false.
     853        $utf8_modifier = $pcre_utf8 ? 'u' : '';
     854       
     855        if ( $pcre_utf8 ) {
     856                $filename = html_entity_decode( $filename, ENT_NOQUOTES, 'UTF-8' );
     857                $filename = preg_replace( '`(?!\.)[^\p{L}\p{Nd}]+`u', '-', $filename ); // Convert everything except letters, decimal numbers, and "." (dot) to dashes
     858                if ( ! $filename ) // Invalid UTF-8 string
     859                        return '';
     860        }
     861
     862        // With PCRE UTF-8, these characters will have been stripped already, but there might be some added by a filter
    844863        $special_chars = array("?", "[", "]", "/", "\\", "=", "<", ">", ":", ";", ",", "'", "\"", "&", "$", "#", "*", "(", ")", "|", "~", "`", "!", "{", "}", chr(0));
    845864        $special_chars = apply_filters('sanitize_file_name_chars', $special_chars, $filename_raw);
    846         $filename = str_replace($special_chars, '', $filename);
    847         $filename = preg_replace('/[\s-]+/', '-', $filename);
    848         $filename = trim($filename, '.-_');
     865       
     866        $strip_characters = preg_quote( implode( '', $special_chars ), '`' ); // Quote the special characters
     867        $filename = preg_replace( "`[$strip_characters]`$utf8_modifier", '-', $filename );  // Convert them to dashes
     868        $filename = preg_replace( "`[\s-]+`$utf8_modifier", '-', $filename ); // Check whitespace and multiple dashes
     869        $filename = preg_replace( "`-\.`$utf8_modifier", '.', $filename );  // Trim dashes before a dot
     870        $filename = trim( $filename, '.-_' );
     871       
     872        if ( function_exists( 'mb_strtolower' ) )
     873                $filename = mb_strtolower( $filename, mb_detect_encoding( $filename ) );
     874        else if ( ! seems_utf8( $filename ) )
     875                $filename = strtolower( $filename );
    849876
     877        // Apply filters before the allowed extensions check, since they might modify the filename
     878        $filename = apply_filters('sanitize_file_name', $filename, $filename_raw);
     879       
    850880        // Split the filename into a base and extension[s]
    851881        $parts = explode('.', $filename);
    852 
     882       
    853883        // Return if only one extension
    854884        if ( count($parts) <= 2 )
    855                 return apply_filters('sanitize_file_name', $filename, $filename_raw);
     885                return $filename;
    856886
    857887        // Process multiple extensions
    858888        $filename = array_shift($parts);
     
    864894        foreach ( (array) $parts as $part) {
    865895                $filename .= '.' . $part;
    866896
    867                 if ( preg_match("/^[a-zA-Z]{2,5}\d?$/", $part) ) {
     897                if ( preg_match("`^[a-zA-Z]{2,5}\d?$`$utf8_modifier", $part) ) {
    868898                        $allowed = false;
    869899                        foreach ( $mimes as $ext_preg => $mime_match ) {
    870                                 $ext_preg = '!^(' . $ext_preg . ')$!i';
     900                                $ext_preg = "`^($ext_preg)$`i$utf8_modifier";
    871901                                if ( preg_match( $ext_preg, $part ) ) {
    872902                                        $allowed = true;
    873903                                        break;
     
    879909        }
    880910        $filename .= '.' . $extension;
    881911
    882         return apply_filters('sanitize_file_name', $filename, $filename_raw);
     912        return $filename;
    883913}
    884914
    885915/**