WordPress.org

Make WordPress Core

Changeset 10391


Ignore:
Timestamp:
01/21/09 18:50:51 (5 years ago)
Author:
ryan
Message:

Refactor filters to avoid potential XSS attacks, props sambauers and DD32. fixes #8767 for 2.7

Location:
branches/2.7/wp-includes
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/2.7/wp-includes/compat.php

    r7140 r10391  
    9797} 
    9898 
     99if ( !function_exists( 'htmlspecialchars_decode' ) ) { 
     100    // Added in PHP 5.1.0 
     101    // Error checks from PEAR::PHP_Compat 
     102    function htmlspecialchars_decode( $str, $quote_style = ENT_COMPAT ) 
     103    { 
     104        if ( !is_scalar( $string ) ) { 
     105            trigger_error( 'htmlspecialchars_decode() expects parameter 1 to be string, ' . gettype( $string ) . ' given', E_USER_WARNING ); 
     106            return; 
     107        } 
     108 
     109        if ( !is_int( $quote_style ) && $quote_style !== null ) { 
     110            trigger_error( 'htmlspecialchars_decode() expects parameter 2 to be integer, ' . gettype( $quote_style ) . ' given', E_USER_WARNING ); 
     111            return; 
     112        } 
     113 
     114        return wp_specialchars_decode( $str, $quote_style ); 
     115    } 
     116} 
     117 
    99118?> 
  • branches/2.7/wp-includes/formatting.php

    r10371 r10391  
    187187 * Converts a number of special characters into their HTML entities. 
    188188 * 
    189  * Differs from htmlspecialchars as existing HTML entities will not be encoded. 
    190  * Specifically changes: & to &#038;, < to &lt; and > to &gt;. 
    191  * 
    192  * $quotes can be set to 'single' to encode ' to &#039;, 'double' to encode " to 
    193  * &quot;, or '1' to do both. Default is 0 where no quotes are encoded. 
     189 * Specifically deals with: &, <, >, ", and '. 
     190 * 
     191 * $quote_style can be set to ENT_COMPAT to encode " to 
     192 * &quot;, or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded. 
    194193 * 
    195194 * @since 1.2.2 
    196195 * 
    197  * @param string $text The text which is to be encoded. 
    198  * @param mixed $quotes Optional. Converts single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default 0. 
     196 * @param string $string The text which is to be encoded. 
     197 * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES. 
     198 * @param string $charset Optional. The character encoding of the string. Default is false. 
     199 * @param boolean $double_encode Optional. Whether or not to encode existing html entities. Default is false. 
    199200 * @return string The encoded text with HTML entities. 
    200201 */ 
    201 function wp_specialchars( $text, $quotes = 0 ) { 
    202     // Like htmlspecialchars except don't double-encode HTML entities 
    203     $text = str_replace('&&', '&#038;&', $text); 
    204     $text = str_replace('&&', '&#038;&', $text); 
    205     $text = preg_replace('/&(?:$|([^#])(?![a-z1-4]{1,8};))/', '&#038;$1', $text); 
    206     $text = str_replace('<', '&lt;', $text); 
    207     $text = str_replace('>', '&gt;', $text); 
    208     if ( 'double' === $quotes ) { 
    209         $text = str_replace('"', '&quot;', $text); 
    210     } elseif ( 'single' === $quotes ) { 
    211         $text = str_replace("'", '&#039;', $text); 
    212     } elseif ( $quotes ) { 
    213         $text = str_replace('"', '&quot;', $text); 
    214         $text = str_replace("'", '&#039;', $text); 
    215     } 
    216     return $text; 
     202function wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) 
     203{ 
     204    $string = (string) $string; 
     205 
     206    if ( 0 === strlen( $string ) ) { 
     207        return ''; 
     208    } 
     209 
     210    // Don't bother if there are no specialchars - saves some processing 
     211    if ( !preg_match( '/[&<>"\']/', $string ) ) { 
     212        return $string; 
     213    } 
     214 
     215    // Account for the previous behaviour of the function when the $quote_style is not an accepted value 
     216    if ( empty( $quote_style ) ) { 
     217        $quote_style = ENT_NOQUOTES; 
     218    } elseif ( !in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) { 
     219        $quote_style = ENT_QUOTES; 
     220    } 
     221 
     222    // Store the site charset as a static to avoid multiple calls to wp_load_alloptions() 
     223    if ( !$charset ) { 
     224        static $_charset; 
     225        if ( !isset( $_charset ) ) { 
     226            $alloptions = wp_load_alloptions(); 
     227            $_charset = isset( $alloptions['blog_charset'] ) ? $alloptions['blog_charset'] : ''; 
     228        } 
     229        $charset = $_charset; 
     230    } 
     231    if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) ) { 
     232        $charset = 'UTF-8'; 
     233    } 
     234 
     235    $_quote_style = $quote_style; 
     236 
     237    if ( $quote_style === 'double' ) { 
     238        $quote_style = ENT_COMPAT; 
     239        $_quote_style = ENT_COMPAT; 
     240    } elseif ( $quote_style === 'single' ) { 
     241        $quote_style = ENT_NOQUOTES; 
     242    } 
     243 
     244    // Handle double encoding ourselves 
     245    if ( !$double_encode ) { 
     246        $string = wp_specialchars_decode( $string, $_quote_style ); 
     247        $string = preg_replace( '/&(#?x?[0-9]+|[a-z]+);/i', '|wp_entity|$1|/wp_entity|', $string ); 
     248    } 
     249 
     250    $string = htmlspecialchars( $string, $quote_style, $charset ); 
     251 
     252    // Handle double encoding ourselves 
     253    if ( !$double_encode ) { 
     254        $string = str_replace( array( '|wp_entity|', '|/wp_entity|' ), array( '&', ';' ), $string ); 
     255    } 
     256 
     257    // Backwards compatibility 
     258    if ( 'single' === $_quote_style ) { 
     259        $string = str_replace( "'", '&#039;', $string ); 
     260    } 
     261 
     262    return $string; 
     263} 
     264 
     265/** 
     266 * Converts a number of HTML entities into their special characters. 
     267 * 
     268 * Specifically deals with: &, <, >, ", and '. 
     269 * 
     270 * $quote_style can be set to ENT_COMPAT to decode " entities, 
     271 * or ENT_QUOTES to do both " and '. Default is ENT_NOQUOTES where no quotes are decoded. 
     272 * 
     273 * @since 2.8 
     274 * 
     275 * @param string $string The text which is to be decoded. 
     276 * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old wp_specialchars() values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES. 
     277 * @return string The decoded text without HTML entities. 
     278 */ 
     279function wp_specialchars_decode( $string, $quote_style = ENT_NOQUOTES ) 
     280{ 
     281    $string = (string) $string; 
     282 
     283    if ( 0 === strlen( $string ) ) { 
     284        return ''; 
     285    } 
     286 
     287    // Don't bother if there are no entities - saves a lot of processing 
     288    if ( strpos( $string, '&' ) === false ) { 
     289        return $string; 
     290    } 
     291 
     292    // Match the previous behaviour of wp_specialchars() when the $quote_style is not an accepted value 
     293    if ( empty( $quote_style ) ) { 
     294        $quote_style = ENT_NOQUOTES; 
     295    } elseif ( !in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) { 
     296        $quote_style = ENT_QUOTES; 
     297    } 
     298 
     299    // More complete than get_html_translation_table( HTML_SPECIALCHARS ) 
     300    $single = array( '&#039;'  => '\'', '&#x27;' => '\'' ); 
     301    $single_preg = array( '/&#0*39;/'  => '&#039;', '/&#x0*27;/i' => '&#x27;' ); 
     302    $double = array( '&quot;' => '"', '&#034;'  => '"', '&#x22;' => '"' ); 
     303    $double_preg = array( '/&#0*34;/'  => '&#034;', '/&#x0*22;/i' => '&#x22;' ); 
     304    $others = array( '&lt;'   => '<', '&#060;'  => '<', '&gt;'   => '>', '&#062;'  => '>', '&amp;'  => '&', '&#038;'  => '&', '&#x26;' => '&' ); 
     305    $others_preg = array( '/&#0*60;/'  => '&#060;', '/&#0*62;/'  => '&#062;', '/&#0*38;/'  => '&#038;', '/&#x0*26;/i' => '&#x26;' ); 
     306 
     307    if ( $quote_style === ENT_QUOTES ) { 
     308        $translation = array_merge( $single, $double, $others ); 
     309        $translation_preg = array_merge( $single_preg, $double_preg, $others_preg ); 
     310    } elseif ( $quote_style === ENT_COMPAT || $quote_style === 'double' ) { 
     311        $translation = array_merge( $double, $others ); 
     312        $translation_preg = array_merge( $double_preg, $others_preg ); 
     313    } elseif ( $quote_style === 'single' ) { 
     314        $translation = array_merge( $single, $others ); 
     315        $translation_preg = array_merge( $single_preg, $others_preg ); 
     316    } elseif ( $quote_style === ENT_NOQUOTES ) { 
     317        $translation = $others; 
     318        $translation_preg = $others_preg; 
     319    } 
     320 
     321    // Remove zero padding on numeric entities 
     322    $string = preg_replace( array_keys( $translation_preg ), array_values( $translation_preg ), $string ); 
     323 
     324    // Replace characters according to translation table 
     325    return strtr( $string, $translation ); 
     326} 
     327 
     328/** 
     329 * Checks for invalid UTF8 in a string. 
     330 * 
     331 * @since 2.8 
     332 * 
     333 * @param string $string The text which is to be checked. 
     334 * @param boolean $strip Optional. Whether to attempt to strip out invalid UTF8. Default is false. 
     335 * @return string The checked text. 
     336 */ 
     337function wp_check_invalid_utf8( $string, $strip = false ) 
     338{ 
     339    $string = (string) $string; 
     340 
     341    if ( 0 === strlen( $string ) ) { 
     342        return ''; 
     343    } 
     344 
     345    // Store the site charset as a static to avoid multiple calls to get_option() 
     346    static $is_utf8; 
     347    if ( !isset( $is_utf8 ) ) { 
     348        $is_utf8 = in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ); 
     349    } 
     350    if ( !$is_utf8 ) { 
     351        return $string; 
     352    } 
     353 
     354    // Check for support for utf8 in the installed PCRE library once and store the result in a static 
     355    static $utf8_pcre; 
     356    if ( !isset( $utf8_pcre ) ) { 
     357        $utf8_pcre = @preg_match( '/^./u', 'a' ); 
     358    } 
     359    // We can't demand utf8 in the PCRE installation, so just return the string in those cases 
     360    if ( !$utf8_pcre ) { 
     361        return $string; 
     362    } 
     363 
     364    // preg_match fails when it encounters invalid UTF8 in $string 
     365    if ( 1 === @preg_match( '/^./us', $string ) ) { 
     366        return $string; 
     367    } 
     368 
     369    // Attempt to strip the bad chars if requested (not recommended) 
     370    if ( $strip && function_exists( 'iconv' ) ) { 
     371        return iconv( 'utf-8', 'utf-8', $string ); 
     372    } 
     373 
     374    return ''; 
    217375} 
    218376 
     
    17441902 */ 
    17451903function js_escape($text) { 
    1746     $safe_text = wp_specialchars($text, 'double'); 
    1747     $safe_text = preg_replace('/&#(x)?0*(?(1)27|39);?/i', "'", stripslashes($safe_text)); 
    1748     $safe_text = preg_replace("/\r?\n/", "\\n", addslashes($safe_text)); 
    1749     return apply_filters('js_escape', $safe_text, $text); 
     1904    $safe_text = wp_check_invalid_utf8( $text ); 
     1905    $safe_text = wp_specialchars( $safe_text, ENT_COMPAT ); 
     1906    $safe_text = preg_replace( '/&#(x)?0*(?(1)27|39);?/i', "'", stripslashes( $safe_text ) ); 
     1907    $safe_text = preg_replace( "/\r?\n/", "\\n", addslashes( $safe_text ) ); 
     1908    return apply_filters( 'js_escape', $safe_text, $text ); 
    17501909} 
    17511910 
     
    17581917 * @return string 
    17591918 */ 
    1760 function attribute_escape($text) { 
    1761     $safe_text = wp_specialchars($text, true); 
    1762     return apply_filters('attribute_escape', $safe_text, $text); 
     1919function attribute_escape( $text ) { 
     1920    $safe_text = wp_check_invalid_utf8( $text ); 
     1921    $safe_text = wp_specialchars( $safe_text, ENT_QUOTES ); 
     1922    return apply_filters( 'attribute_escape', $safe_text, $text ); 
    17631923} 
    17641924 
Note: See TracChangeset for help on using the changeset viewer.