Make WordPress Core

Changeset 10391


Ignore:
Timestamp:
01/21/2009 06:50:51 PM (16 years ago)
Author:
ryan
Message:

Refactor filters to avoid potential XSS attacks, props sambauers and DD32. fixes #8767 for 2.7

Location:
branches/2.7/wp-includes
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/2.7/wp-includes/compat.php

    r7140 r10391  
    9797}
    9898
     99if ( !function_exists( 'htmlspecialchars_decode' ) ) {
     100    // Added in PHP 5.1.0
     101    // Error checks from PEAR::PHP_Compat
     102    function htmlspecialchars_decode( $str, $quote_style = ENT_COMPAT )
     103    {
     104        if ( !is_scalar( $string ) ) {
     105            trigger_error( 'htmlspecialchars_decode() expects parameter 1 to be string, ' . gettype( $string ) . ' given', E_USER_WARNING );
     106            return;
     107        }
     108
     109        if ( !is_int( $quote_style ) && $quote_style !== null ) {
     110            trigger_error( 'htmlspecialchars_decode() expects parameter 2 to be integer, ' . gettype( $quote_style ) . ' given', E_USER_WARNING );
     111            return;
     112        }
     113
     114        return wp_specialchars_decode( $str, $quote_style );
     115    }
     116}
     117
    99118?>
  • branches/2.7/wp-includes/formatting.php

    r10371 r10391  
    187187 * Converts a number of special characters into their HTML entities.
    188188 *
    189  * Differs from htmlspecialchars as existing HTML entities will not be encoded.
    190  * Specifically changes: & to &#038;, < to &lt; and > to &gt;.
    191  *
    192  * $quotes can be set to 'single' to encode ' to &#039;, 'double' to encode " to
    193  * &quot;, or '1' to do both. Default is 0 where no quotes are encoded.
     189 * Specifically deals with: &, <, >, ", and '.
     190 *
     191 * $quote_style can be set to ENT_COMPAT to encode " to
     192 * &quot;, or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded.
    194193 *
    195194 * @since 1.2.2
    196195 *
    197  * @param string $text The text which is to be encoded.
    198  * @param mixed $quotes Optional. Converts single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default 0.
     196 * @param string $string The text which is to be encoded.
     197 * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES.
     198 * @param string $charset Optional. The character encoding of the string. Default is false.
     199 * @param boolean $double_encode Optional. Whether or not to encode existing html entities. Default is false.
    199200 * @return string The encoded text with HTML entities.
    200201 */
    201 function wp_specialchars( $text, $quotes = 0 ) {
    202     // Like htmlspecialchars except don't double-encode HTML entities
    203     $text = str_replace('&&', '&#038;&', $text);
    204     $text = str_replace('&&', '&#038;&', $text);
    205     $text = preg_replace('/&(?:$|([^#])(?![a-z1-4]{1,8};))/', '&#038;$1', $text);
    206     $text = str_replace('<', '&lt;', $text);
    207     $text = str_replace('>', '&gt;', $text);
    208     if ( 'double' === $quotes ) {
    209         $text = str_replace('"', '&quot;', $text);
    210     } elseif ( 'single' === $quotes ) {
    211         $text = str_replace("'", '&#039;', $text);
    212     } elseif ( $quotes ) {
    213         $text = str_replace('"', '&quot;', $text);
    214         $text = str_replace("'", '&#039;', $text);
    215     }
    216     return $text;
     202function wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false )
     203{
     204    $string = (string) $string;
     205
     206    if ( 0 === strlen( $string ) ) {
     207        return '';
     208    }
     209
     210    // Don't bother if there are no specialchars - saves some processing
     211    if ( !preg_match( '/[&<>"\']/', $string ) ) {
     212        return $string;
     213    }
     214
     215    // Account for the previous behaviour of the function when the $quote_style is not an accepted value
     216    if ( empty( $quote_style ) ) {
     217        $quote_style = ENT_NOQUOTES;
     218    } elseif ( !in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) {
     219        $quote_style = ENT_QUOTES;
     220    }
     221
     222    // Store the site charset as a static to avoid multiple calls to wp_load_alloptions()
     223    if ( !$charset ) {
     224        static $_charset;
     225        if ( !isset( $_charset ) ) {
     226            $alloptions = wp_load_alloptions();
     227            $_charset = isset( $alloptions['blog_charset'] ) ? $alloptions['blog_charset'] : '';
     228        }
     229        $charset = $_charset;
     230    }
     231    if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) ) {
     232        $charset = 'UTF-8';
     233    }
     234
     235    $_quote_style = $quote_style;
     236
     237    if ( $quote_style === 'double' ) {
     238        $quote_style = ENT_COMPAT;
     239        $_quote_style = ENT_COMPAT;
     240    } elseif ( $quote_style === 'single' ) {
     241        $quote_style = ENT_NOQUOTES;
     242    }
     243
     244    // Handle double encoding ourselves
     245    if ( !$double_encode ) {
     246        $string = wp_specialchars_decode( $string, $_quote_style );
     247        $string = preg_replace( '/&(#?x?[0-9]+|[a-z]+);/i', '|wp_entity|$1|/wp_entity|', $string );
     248    }
     249
     250    $string = htmlspecialchars( $string, $quote_style, $charset );
     251
     252    // Handle double encoding ourselves
     253    if ( !$double_encode ) {
     254        $string = str_replace( array( '|wp_entity|', '|/wp_entity|' ), array( '&', ';' ), $string );
     255    }
     256
     257    // Backwards compatibility
     258    if ( 'single' === $_quote_style ) {
     259        $string = str_replace( "'", '&#039;', $string );
     260    }
     261
     262    return $string;
     263}
     264
     265/**
     266 * Converts a number of HTML entities into their special characters.
     267 *
     268 * Specifically deals with: &, <, >, ", and '.
     269 *
     270 * $quote_style can be set to ENT_COMPAT to decode " entities,
     271 * or ENT_QUOTES to do both " and '. Default is ENT_NOQUOTES where no quotes are decoded.
     272 *
     273 * @since 2.8
     274 *
     275 * @param string $string The text which is to be decoded.
     276 * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old wp_specialchars() values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES.
     277 * @return string The decoded text without HTML entities.
     278 */
     279function wp_specialchars_decode( $string, $quote_style = ENT_NOQUOTES )
     280{
     281    $string = (string) $string;
     282
     283    if ( 0 === strlen( $string ) ) {
     284        return '';
     285    }
     286
     287    // Don't bother if there are no entities - saves a lot of processing
     288    if ( strpos( $string, '&' ) === false ) {
     289        return $string;
     290    }
     291
     292    // Match the previous behaviour of wp_specialchars() when the $quote_style is not an accepted value
     293    if ( empty( $quote_style ) ) {
     294        $quote_style = ENT_NOQUOTES;
     295    } elseif ( !in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) {
     296        $quote_style = ENT_QUOTES;
     297    }
     298
     299    // More complete than get_html_translation_table( HTML_SPECIALCHARS )
     300    $single = array( '&#039;'  => '\'', '&#x27;' => '\'' );
     301    $single_preg = array( '/&#0*39;/'  => '&#039;', '/&#x0*27;/i' => '&#x27;' );
     302    $double = array( '&quot;' => '"', '&#034;'  => '"', '&#x22;' => '"' );
     303    $double_preg = array( '/&#0*34;/'  => '&#034;', '/&#x0*22;/i' => '&#x22;' );
     304    $others = array( '&lt;'   => '<', '&#060;'  => '<', '&gt;'   => '>', '&#062;'  => '>', '&amp;'  => '&', '&#038;'  => '&', '&#x26;' => '&' );
     305    $others_preg = array( '/&#0*60;/'  => '&#060;', '/&#0*62;/'  => '&#062;', '/&#0*38;/'  => '&#038;', '/&#x0*26;/i' => '&#x26;' );
     306
     307    if ( $quote_style === ENT_QUOTES ) {
     308        $translation = array_merge( $single, $double, $others );
     309        $translation_preg = array_merge( $single_preg, $double_preg, $others_preg );
     310    } elseif ( $quote_style === ENT_COMPAT || $quote_style === 'double' ) {
     311        $translation = array_merge( $double, $others );
     312        $translation_preg = array_merge( $double_preg, $others_preg );
     313    } elseif ( $quote_style === 'single' ) {
     314        $translation = array_merge( $single, $others );
     315        $translation_preg = array_merge( $single_preg, $others_preg );
     316    } elseif ( $quote_style === ENT_NOQUOTES ) {
     317        $translation = $others;
     318        $translation_preg = $others_preg;
     319    }
     320
     321    // Remove zero padding on numeric entities
     322    $string = preg_replace( array_keys( $translation_preg ), array_values( $translation_preg ), $string );
     323
     324    // Replace characters according to translation table
     325    return strtr( $string, $translation );
     326}
     327
     328/**
     329 * Checks for invalid UTF8 in a string.
     330 *
     331 * @since 2.8
     332 *
     333 * @param string $string The text which is to be checked.
     334 * @param boolean $strip Optional. Whether to attempt to strip out invalid UTF8. Default is false.
     335 * @return string The checked text.
     336 */
     337function wp_check_invalid_utf8( $string, $strip = false )
     338{
     339    $string = (string) $string;
     340
     341    if ( 0 === strlen( $string ) ) {
     342        return '';
     343    }
     344
     345    // Store the site charset as a static to avoid multiple calls to get_option()
     346    static $is_utf8;
     347    if ( !isset( $is_utf8 ) ) {
     348        $is_utf8 = in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) );
     349    }
     350    if ( !$is_utf8 ) {
     351        return $string;
     352    }
     353
     354    // Check for support for utf8 in the installed PCRE library once and store the result in a static
     355    static $utf8_pcre;
     356    if ( !isset( $utf8_pcre ) ) {
     357        $utf8_pcre = @preg_match( '/^./u', 'a' );
     358    }
     359    // We can't demand utf8 in the PCRE installation, so just return the string in those cases
     360    if ( !$utf8_pcre ) {
     361        return $string;
     362    }
     363
     364    // preg_match fails when it encounters invalid UTF8 in $string
     365    if ( 1 === @preg_match( '/^./us', $string ) ) {
     366        return $string;
     367    }
     368
     369    // Attempt to strip the bad chars if requested (not recommended)
     370    if ( $strip && function_exists( 'iconv' ) ) {
     371        return iconv( 'utf-8', 'utf-8', $string );
     372    }
     373
     374    return '';
    217375}
    218376
     
    17441902 */
    17451903function js_escape($text) {
    1746     $safe_text = wp_specialchars($text, 'double');
    1747     $safe_text = preg_replace('/&#(x)?0*(?(1)27|39);?/i', "'", stripslashes($safe_text));
    1748     $safe_text = preg_replace("/\r?\n/", "\\n", addslashes($safe_text));
    1749     return apply_filters('js_escape', $safe_text, $text);
     1904    $safe_text = wp_check_invalid_utf8( $text );
     1905    $safe_text = wp_specialchars( $safe_text, ENT_COMPAT );
     1906    $safe_text = preg_replace( '/&#(x)?0*(?(1)27|39);?/i', "'", stripslashes( $safe_text ) );
     1907    $safe_text = preg_replace( "/\r?\n/", "\\n", addslashes( $safe_text ) );
     1908    return apply_filters( 'js_escape', $safe_text, $text );
    17501909}
    17511910
     
    17581917 * @return string
    17591918 */
    1760 function attribute_escape($text) {
    1761     $safe_text = wp_specialchars($text, true);
    1762     return apply_filters('attribute_escape', $safe_text, $text);
     1919function attribute_escape( $text ) {
     1920    $safe_text = wp_check_invalid_utf8( $text );
     1921    $safe_text = wp_specialchars( $safe_text, ENT_QUOTES );
     1922    return apply_filters( 'attribute_escape', $safe_text, $text );
    17631923}
    17641924
Note: See TracChangeset for help on using the changeset viewer.