Make WordPress Core

Ticket #22363: 22363.5.patch

File 22363.5.patch, 3.8 KB (added by p_enrique, 12 years ago)

Use preg_replace with possible UTF-8 modifier to strip characters, decode html entities, use remove_accents and mb_strtolower

  • src/wp-includes/default-filters.php

     
    177177add_filter( 'tiny_mce_before_init',     '_mce_set_direction'                  );
    178178add_filter( 'pre_kses',                 'wp_pre_kses_less_than'               );
    179179add_filter( 'sanitize_title',           'sanitize_title_with_dashes',   10, 3 );
     180add_filter( 'sanitize_file_name',               'remove_accents'                                          );
    180181add_action( 'check_comment_flood',      'check_comment_flood_db',       10, 3 );
    181182add_filter( 'comment_flood_filter',     'wp_throttle_comment_flood',    10, 3 );
    182183add_filter( 'pre_comment_content',      'wp_rel_nofollow',              15    );
  • src/wp-includes/formatting.php

     
    841841 */
    842842function sanitize_file_name( $filename ) {
    843843        $filename_raw = $filename;
     844       
     845        $filename = wp_strip_all_tags( $filename );
     846       
     847        $pcre_utf8 = ( false != @preg_match( '`[\p{L}]`u', 'A' ) ); // Try to detect if PCRE UTF-8 extension is compiled and working. Should return '1'.
     848        $utf8_modifier = $pcre_utf8 ? 'u' : '';
     849       
     850        if ( $pcre_utf8 ) {
     851                $filename = html_entity_decode( $filename, ENT_NOQUOTES, 'UTF-8' );
     852                $filename = preg_replace( '`(?!\.)[^\p{L}\p{Nd}]+`u', '-', $filename ); // Convert everything except letters, decimal numbers, and "." (dot) to dashes
     853                if ( ! $filename ) // Invalid UTF-8 string
     854                        return '';
     855        }
     856
     857        // With PCRE UTF-8, these characters will have been stripped already, but there might be some added by a filter
    844858        $special_chars = array("?", "[", "]", "/", "\\", "=", "<", ">", ":", ";", ",", "'", "\"", "&", "$", "#", "*", "(", ")", "|", "~", "`", "!", "{", "}", chr(0));
    845859        $special_chars = apply_filters('sanitize_file_name_chars', $special_chars, $filename_raw);
    846         $filename = str_replace($special_chars, '', $filename);
    847         $filename = preg_replace('/[\s-]+/', '-', $filename);
    848         $filename = trim($filename, '.-_');
     860       
     861        $strip_characters = preg_quote( implode( '', $special_chars ), '`' ); // Quote the special characters
     862        $filename = preg_replace( "`[$strip_characters]`$utf8_modifier", '-', $filename );  // Convert them to dashes
     863        $filename = preg_replace( "`[\s-]+`$utf8_modifier", '-', $filename ); // Check whitespace and multiple dashes
     864        $filename = preg_replace( "`-\.`$utf8_modifier", '.', $filename );  // Trim dashes before a dot
     865        $filename = trim( $filename, '.-_' );
     866       
     867        if ( function_exists( 'mb_strtolower' ) )
     868                $filename = mb_strtolower( $filename, mb_detect_encoding( $filename ) );
    849869
     870        // Apply filters before the allowed extensions check, since they might modify the filename
     871        $filename = apply_filters('sanitize_file_name', $filename, $filename_raw);
     872       
    850873        // Split the filename into a base and extension[s]
    851874        $parts = explode('.', $filename);
    852 
     875       
    853876        // Return if only one extension
    854877        if ( count($parts) <= 2 )
    855                 return apply_filters('sanitize_file_name', $filename, $filename_raw);
     878                return $filename;
    856879
    857880        // Process multiple extensions
    858881        $filename = array_shift($parts);
     
    864887        foreach ( (array) $parts as $part) {
    865888                $filename .= '.' . $part;
    866889
    867                 if ( preg_match("/^[a-zA-Z]{2,5}\d?$/", $part) ) {
     890                if ( preg_match("`^[a-zA-Z]{2,5}\d?$`$utf8_modifier", $part) ) {
    868891                        $allowed = false;
    869892                        foreach ( $mimes as $ext_preg => $mime_match ) {
    870                                 $ext_preg = '!^(' . $ext_preg . ')$!i';
     893                                $ext_preg = "`^($ext_preg)$`i$utf8_modifier";
    871894                                if ( preg_match( $ext_preg, $part ) ) {
    872895                                        $allowed = true;
    873896                                        break;
     
    879902        }
    880903        $filename .= '.' . $extension;
    881904
    882         return apply_filters('sanitize_file_name', $filename, $filename_raw);
     905        return $filename;
    883906}
    884907
    885908/**