Ticket #16042: 16042-full.patch
| File 16042-full.patch, 17.4 KB (added by , 15 years ago) |
|---|
-
wp-includes/kses.php
1 1 <?php 2 2 /** 3 * HTML/XHTML filter that only allows some elements and attributes 3 * kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes 4 * Copyright (C) 2002, 2003, 2005 Ulf Harnhammar 4 5 * 6 * This program is free software and open source software; you can redistribute 7 * it and/or modify it under the terms of the GNU General Public License as 8 * published by the Free Software Foundation; either version 2 of the License, 9 * or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * more details. 15 * 16 * You should have received a copy of the GNU General Public License along 17 * with this program; if not, write to the Free Software Foundation, Inc., 18 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA or visit 19 * http://www.gnu.org/licenses/gpl.html 20 * 21 * [kses strips evil scripts!] 22 * 5 23 * Added wp_ prefix to avoid conflicts with existing kses users 6 24 * 7 25 * @version 0.2.2 … … 10 28 * 11 29 * @package External 12 30 * @subpackage KSES 13 *14 * @internal15 * *** CONTACT INFORMATION ***16 * E-mail: metaur at users dot sourceforge dot net17 * Web page: http://sourceforge.net/projects/kses18 * Paper mail: Ulf Harnhammar19 * Ymergatan 17 C20 * 753 25 Uppsala21 * SWEDEN22 *23 * [kses strips evil scripts!]24 31 */ 25 32 26 33 /** … … 29 36 * 30 37 * @since 1.2.0 31 38 */ 32 if ( !defined('CUSTOM_TAGS'))33 define( 'CUSTOM_TAGS', false);39 if ( ! defined( 'CUSTOM_TAGS' ) ) 40 define( 'CUSTOM_TAGS', false ); 34 41 35 if ( !CUSTOM_TAGS) {42 if ( ! CUSTOM_TAGS ) { 36 43 /** 37 44 * Kses global for default allowable HTML tags. 38 45 * … … 57 64 'title' => array ()), 58 65 'acronym' => array( 59 66 'title' => array ()), 67 'article' => array( 68 'align' => array (), 69 'class' => array (), 70 'dir' => array (), 71 'lang' => array(), 72 'style' => array (), 73 'xml:lang' => array(), 74 ), 75 'aside' => array( 76 'align' => array (), 77 'class' => array (), 78 'dir' => array (), 79 'lang' => array(), 80 'style' => array (), 81 'xml:lang' => array(), 82 ), 60 83 'b' => array(), 61 84 'big' => array(), 62 85 'blockquote' => array( … … 94 117 'del' => array( 95 118 'datetime' => array ()), 96 119 'dd' => array(), 120 'details' => array( 121 'align' => array (), 122 'class' => array (), 123 'dir' => array (), 124 'lang' => array(), 125 'open' => array (), 126 'style' => array (), 127 'xml:lang' => array(), 128 ), 97 129 'div' => array( 98 130 'align' => array (), 99 131 'class' => array (), … … 105 137 'dt' => array(), 106 138 'em' => array(), 107 139 'fieldset' => array(), 140 'figure' => array( 141 'align' => array (), 142 'class' => array (), 143 'dir' => array (), 144 'lang' => array(), 145 'style' => array (), 146 'xml:lang' => array(), 147 ), 148 'figcaption' => array( 149 'align' => array (), 150 'class' => array (), 151 'dir' => array (), 152 'lang' => array(), 153 'style' => array (), 154 'xml:lang' => array(), 155 ), 108 156 'font' => array( 109 157 'color' => array (), 110 158 'face' => array (), 111 159 'size' => array ()), 160 'footer' => array( 161 'align' => array (), 162 'class' => array (), 163 'dir' => array (), 164 'lang' => array(), 165 'style' => array (), 166 'xml:lang' => array(), 167 ), 112 168 'form' => array( 113 169 'action' => array (), 114 170 'accept' => array (), … … 147 203 'class' => array (), 148 204 'id' => array (), 149 205 'style' => array ()), 206 'header' => array( 207 'align' => array (), 208 'class' => array (), 209 'dir' => array (), 210 'lang' => array(), 211 'style' => array (), 212 'xml:lang' => array(), 213 ), 214 'hgroup' => array( 215 'align' => array (), 216 'class' => array (), 217 'dir' => array (), 218 'lang' => array(), 219 'style' => array (), 220 'xml:lang' => array(), 221 ), 150 222 'hr' => array ( 151 223 'align' => array (), 152 224 'class' => array (), … … 177 249 'li' => array ( 178 250 'align' => array (), 179 251 'class' => array ()), 252 'menu' => array ( 253 'class' => array (), 254 'style' => array (), 255 'type' => array ()), 256 'nav' => array( 257 'align' => array (), 258 'class' => array (), 259 'dir' => array (), 260 'lang' => array(), 261 'style' => array (), 262 'xml:lang' => array(), 263 ), 180 264 'p' => array( 181 265 'class' => array (), 182 266 'align' => array (), … … 198 282 'style' => array (), 199 283 'title' => array (), 200 284 'xml:lang' => array()), 285 'section' => array( 286 'align' => array (), 287 'class' => array (), 288 'dir' => array (), 289 'lang' => array(), 290 'style' => array (), 291 'xml:lang' => array(), 292 ), 201 293 'strike' => array(), 202 294 'strong' => array(), 203 295 'sub' => array(), 296 'summary' => array( 297 'align' => array (), 298 'class' => array (), 299 'dir' => array (), 300 'lang' => array(), 301 'style' => array (), 302 'xml:lang' => array(), 303 ), 204 304 'sup' => array(), 205 305 'table' => array( 206 306 'align' => array (), … … 333 433 // 'u' => array(), 334 434 // 'ul' => array(), 335 435 ); 436 437 $allowedentitynames = array( 438 'nbsp', 'iexcl', 'cent', 'pound', 'curren', 'yen', 439 'brvbar', 'sect', 'uml', 'copy', 'ordf', 'laquo', 440 'not', 'shy', 'reg', 'macr', 'deg', 'plusmn', 441 'acute', 'micro', 'para', 'middot', 'cedil', 'ordm', 442 'raquo', 'iquest', 'Agrave', 'Aacute', 'Acirc', 'Atilde', 443 'Auml', 'Aring', 'AElig', 'Ccedil', 'Egrave', 'Eacute', 444 'Ecirc', 'Euml', 'Igrave', 'Iacute', 'Icirc', 'Iuml', 445 'ETH', 'Ntilde', 'Ograve', 'Oacute', 'Ocirc', 'Otilde', 446 'Ouml', 'times', 'Oslash', 'Ugrave', 'Uacute', 'Ucirc', 447 'Uuml', 'Yacute', 'THORN', 'szlig', 'agrave', 'aacute', 448 'acirc', 'atilde', 'auml', 'aring', 'aelig', 'ccedil', 449 'egrave', 'eacute', 'ecirc', 'euml', 'igrave', 'iacute', 450 'icirc', 'iuml', 'eth', 'ntilde', 'ograve', 'oacute', 451 'ocirc', 'otilde', 'ouml', 'divide', 'oslash', 'ugrave', 452 'uacute', 'ucirc', 'uuml', 'yacute', 'thorn', 'yuml', 453 'quot', 'amp', 'lt', 'gt', 'apos', 'OElig', 454 'oelig', 'Scaron', 'scaron', 'Yuml', 'circ', 'tilde', 455 'ensp', 'emsp', 'thinsp', 'zwnj', 'zwj', 'lrm', 456 'rlm', 'ndash', 'mdash', 'lsquo', 'rsquo', 'sbquo', 457 'ldquo', 'rdquo', 'bdquo', 'dagger', 'Dagger', 'permil', 458 'lsaquo', 'rsaquo', 'euro', 'fnof', 'Alpha', 'Beta', 459 'Gamma', 'Delta', 'Epsilon', 'Zeta', 'Eta', 'Theta', 460 'Iota', 'Kappa', 'Lambda', 'Mu', 'Nu', 'Xi', 461 'Omicron', 'Pi', 'Rho', 'Sigma', 'Tau', 'Upsilon', 462 'Phi', 'Chi', 'Psi', 'Omega', 'alpha', 'beta', 463 'gamma', 'delta', 'epsilon', 'zeta', 'eta', 'theta', 464 'iota', 'kappa', 'lambda', 'mu', 'nu', 'xi', 465 'omicron', 'pi', 'rho', 'sigmaf', 'sigma', 'tau', 466 'upsilon', 'phi', 'chi', 'psi', 'omega', 'thetasym', 467 'upsih', 'piv', 'bull', 'hellip', 'prime', 'Prime', 468 'oline', 'frasl', 'weierp', 'image', 'real', 'trade', 469 'alefsym', 'larr', 'uarr', 'rarr', 'darr', 'harr', 470 'crarr', 'lArr', 'uArr', 'rArr', 'dArr', 'hArr', 471 'forall', 'part', 'exist', 'empty', 'nabla', 'isin', 472 'notin', 'ni', 'prod', 'sum', 'minus', 'lowast', 473 'radic', 'prop', 'infin', 'ang', 'and', 'or', 474 'cap', 'cup', 'int', 'sim', 'cong', 'asymp', 475 'ne', 'equiv', 'le', 'ge', 'sub', 'sup', 476 'nsub', 'sube', 'supe', 'oplus', 'otimes', 'perp', 477 'sdot', 'lceil', 'rceil', 'lfloor', 'rfloor', 'lang', 478 'rang', 'loz', 'spades', 'clubs', 'hearts', 'diams', 479 ); 336 480 } 337 481 338 482 /** … … 344 488 * call this function. 345 489 * 346 490 * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', 'news', 347 * 'irc', 'gopher', 'nntp', 'feed', and finally 'telnet. This covers all common348 * link protocols, except for 'javascript' which should not be allowed for349 * untrusted users.491 * 'irc', 'gopher', 'nntp', 'feed', 'telnet, 'mms', 'rtsp' and 'svn'. This 492 * covers all common link protocols, except for 'javascript' which should not 493 * be allowed for untrusted users. 350 494 * 351 495 * @since 1.0.0 352 496 * … … 355 499 * @param array $allowed_protocols Optional. Allowed protocol in links. 356 500 * @return string Filtered content with only allowed HTML elements 357 501 */ 358 function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) { 502 function wp_kses($string, $allowed_html, $allowed_protocols = array ()) { 503 $allowed_protocols = wp_parse_args( $allowed_protocols, apply_filters('kses_allowed_protocols', array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet', 'mms', 'rtsp', 'svn') )); 359 504 $string = wp_kses_no_null($string); 360 505 $string = wp_kses_js_entities($string); 361 506 $string = wp_kses_normalize_entities($string); … … 409 554 global $pass_allowed_html, $pass_allowed_protocols; 410 555 $pass_allowed_html = $allowed_html; 411 556 $pass_allowed_protocols = $allowed_protocols; 412 return preg_replace_callback('%((<!--.*?(-->|$))|(<[^>]*(>|$)|>))%', 413 create_function('$match', 'global $pass_allowed_html, $pass_allowed_protocols; return wp_kses_split2($match[1], $pass_allowed_html, $pass_allowed_protocols);'), $string); 557 return preg_replace_callback( '%((<!--.*?(-->|$))|(<[^>]*(>|$)|>))%', '_wp_kses_split_callback', $string ); 414 558 } 415 559 416 560 /** 561 * Callback for wp_kses_split. 562 * 563 * @since 3.1.0 564 * @access private 565 */ 566 function _wp_kses_split_callback( $match ) { 567 global $pass_allowed_html, $pass_allowed_protocols; 568 return wp_kses_split2( $match[1], $pass_allowed_html, $pass_allowed_protocols ); 569 } 570 571 /** 417 572 * Callback for wp_kses_split for fixing malformed HTML tags. 418 573 * 419 574 * This function does a lot of work. It rejects some very malformed things like … … 495 650 # Is there a closing XHTML slash at the end of the attributes? 496 651 497 652 $xhtml_slash = ''; 498 if (preg_match('%\s /\s*$%', $attr))653 if (preg_match('%\s*/\s*$%', $attr)) 499 654 $xhtml_slash = ' /'; 500 655 501 656 # Are any attributes allowed at all for this element? … … 533 688 break; 534 689 } 535 690 536 if ( $arreach['name']== 'style' ) {691 if ( strtolower($arreach['name']) == 'style' ) { 537 692 $orig_value = $arreach['value']; 538 693 539 694 $value = safecss_filter_attr($orig_value); … … 621 776 622 777 case 2 : # attribute value, a URL after href= for instance 623 778 624 if (preg_match(' /^"([^"]*)"(\s+|$)/', $attr, $match))779 if (preg_match('%^"([^"]*)"(\s+|/?$)%', $attr, $match)) 625 780 # "value" 626 781 { 627 782 $thisval = $match[1]; 628 if ( in_array( $attrname, $uris) )783 if ( in_array(strtolower($attrname), $uris) ) 629 784 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols); 630 785 631 786 if(FALSE === array_key_exists($attrname, $attrarr)) { … … 637 792 break; 638 793 } 639 794 640 if (preg_match(" /^'([^']*)'(\s+|$)/", $attr, $match))795 if (preg_match("%^'([^']*)'(\s+|/?$)%", $attr, $match)) 641 796 # 'value' 642 797 { 643 798 $thisval = $match[1]; 644 if ( in_array( $attrname, $uris) )799 if ( in_array(strtolower($attrname), $uris) ) 645 800 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols); 646 801 647 802 if(FALSE === array_key_exists($attrname, $attrarr)) { … … 653 808 break; 654 809 } 655 810 656 if (preg_match("%^([^\s\"']+)(\s+| $)%", $attr, $match))811 if (preg_match("%^([^\s\"']+)(\s+|/?$)%", $attr, $match)) 657 812 # value 658 813 { 659 814 $thisval = $match[1]; 660 if ( in_array( $attrname, $uris) )815 if ( in_array(strtolower($attrname), $uris) ) 661 816 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols); 662 817 663 818 if(FALSE === array_key_exists($attrname, $attrarr)) { … … 699 854 * @param string $vless Whether the value is valueless or not. Use 'y' or 'n' 700 855 * @param string $checkname What $checkvalue is checking for. 701 856 * @param mixed $checkvalue What constraint the value should pass 702 * @return bool Whether check passes (true) or not (false)857 * @return bool Whether check passes 703 858 */ 704 859 function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) { 705 860 $ok = true; … … 880 1035 * @return string Sanitized content 881 1036 */ 882 1037 function wp_kses_bad_protocol_once($string, $allowed_protocols) { 883 global $_kses_allowed_protocols; 884 $_kses_allowed_protocols = $allowed_protocols; 1038 $string2 = preg_split( '/:|�*58;|�*3a;/i', $string, 2 ); 1039 if ( isset($string2[1]) && ! preg_match('%/\?%', $string2[0]) ) 1040 $string = wp_kses_bad_protocol_once2( $string2[0], $allowed_protocols ) . trim( $string2[1] ); 885 1041 886 $string2 = preg_split('/:|:|:/i', $string, 2);887 if ( isset($string2[1]) && !preg_match('%/\?%', $string2[0]) )888 $string = wp_kses_bad_protocol_once2($string2[0]) . trim($string2[1]);889 else890 $string = preg_replace_callback('/^((&[^;]*;|[\sA-Za-z0-9])*)'.'(:|:|&#[Xx]3[Aa];)\s*/', 'wp_kses_bad_protocol_once2', $string);891 892 1042 return $string; 893 1043 } 894 1044 … … 901 1051 * @access private 902 1052 * @since 1.0.0 903 1053 * 904 * @param mixed $matches string or preg_replace_callback() matches array to check for bad protocols 1054 * @param string $string URI scheme to check against the whitelist 1055 * @param string $allowed_protocols Allowed protocols 905 1056 * @return string Sanitized content 906 1057 */ 907 function wp_kses_bad_protocol_once2($matches) { 908 global $_kses_allowed_protocols; 909 910 if ( is_array($matches) ) { 911 if ( ! isset($matches[1]) || empty($matches[1]) ) 912 return ''; 913 914 $string = $matches[1]; 915 } else { 916 $string = $matches; 917 } 918 1058 function wp_kses_bad_protocol_once2( $string, $allowed_protocols ) { 919 1059 $string2 = wp_kses_decode_entities($string); 920 1060 $string2 = preg_replace('/\s/', '', $string2); 921 1061 $string2 = wp_kses_no_null($string2); 922 1062 $string2 = strtolower($string2); 923 1063 924 1064 $allowed = false; 925 foreach ( (array) $ _kses_allowed_protocols as $one_protocol)926 if ( strtolower($one_protocol) == $string2) {1065 foreach ( (array) $allowed_protocols as $one_protocol ) 1066 if ( strtolower($one_protocol) == $string2 ) { 927 1067 $allowed = true; 928 1068 break; 929 1069 } … … 952 1092 953 1093 # Change back the allowed entities in our entity whitelist 954 1094 955 $string = preg_replace ('/&([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string);956 $string = preg_replace_callback('/&# 0*([0-9]{1,5});/', 'wp_kses_normalize_entities2', $string);957 $string = preg_replace_callback('/&# ([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', 'wp_kses_normalize_entities3', $string);1095 $string = preg_replace_callback('/&([A-Za-z]{2,8});/', 'wp_kses_named_entities', $string); 1096 $string = preg_replace_callback('/&#(0*[0-9]{1,7});/', 'wp_kses_normalize_entities2', $string); 1097 $string = preg_replace_callback('/&#[Xx](0*[0-9A-Fa-f]{1,6});/', 'wp_kses_normalize_entities3', $string); 958 1098 959 1099 return $string; 960 1100 } … … 962 1102 /** 963 1103 * Callback for wp_kses_normalize_entities() regular expression. 964 1104 * 1105 * This function only accepts valid named entity references, which are finite, 1106 * case-sensitive, and highly scrutinized by HTML and XML validators. 1107 * 1108 * @since 3.0.0 1109 * 1110 * @param array $matches preg_replace_callback() matches array 1111 * @return string Correctly encoded entity 1112 */ 1113 function wp_kses_named_entities($matches) { 1114 global $allowedentitynames; 1115 1116 if ( empty($matches[1]) ) 1117 return ''; 1118 1119 $i = $matches[1]; 1120 return ( ( ! in_array($i, $allowedentitynames) ) ? "&$i;" : "&$i;" ); 1121 } 1122 1123 /** 1124 * Callback for wp_kses_normalize_entities() regular expression. 1125 * 965 1126 * This function helps wp_kses_normalize_entities() to only accept 16 bit values 966 1127 * and nothing more for &#number; entities. 967 1128 * … … 972 1133 * @return string Correctly encoded entity 973 1134 */ 974 1135 function wp_kses_normalize_entities2($matches) { 975 if ( ! isset($matches[1]) ||empty($matches[1]) )1136 if ( empty($matches[1]) ) 976 1137 return ''; 977 1138 978 1139 $i = $matches[1]; 979 return ( ( ! valid_unicode($i) ) || ($i > 65535) ? "&#$i;" : "&#$i;" ); 1140 if (valid_unicode($i)) { 1141 $i = str_pad(ltrim($i,'0'), 3, '0', STR_PAD_LEFT); 1142 $i = "&#$i;"; 1143 } else { 1144 $i = "&#$i;"; 1145 } 1146 1147 return $i; 980 1148 } 981 1149 982 1150 /** … … 991 1159 * @return string Correctly encoded entity 992 1160 */ 993 1161 function wp_kses_normalize_entities3($matches) { 994 if ( ! isset($matches[2]) || empty($matches[2]) )1162 if ( empty($matches[1]) ) 995 1163 return ''; 996 1164 997 $hexchars = $matches[ 2];998 return ( ( ! valid_unicode(hexdec($hexchars)) ) ? "&#x$hexchars;" : "&#x$hexchars;");1165 $hexchars = $matches[1]; 1166 return ( ( ! valid_unicode(hexdec($hexchars)) ) ? "&#x$hexchars;" : '&#x'.ltrim($hexchars,'0').';' ); 999 1167 } 1000 1168 1001 1169 /** … … 1196 1364 add_action('init', 'kses_init'); 1197 1365 add_action('set_current_user', 'kses_init'); 1198 1366 1367 /** 1368 * Inline CSS filter 1369 * 1370 * @since 2.8.1 1371 */ 1199 1372 function safecss_filter_attr( $css, $deprecated = '' ) { 1200 1373 $css = wp_kses_no_null($css); 1201 1374 $css = str_replace(array("\n","\r","\t"), '', $css); 1202 1375 1203 if ( preg_match( '%[\\(& ]|/\*%', $css ) ) // remove any inline css containing \ ( &or comments1376 if ( preg_match( '%[\\(&=}]|/\*%', $css ) ) // remove any inline css containing \ ( & } = or comments 1204 1377 return ''; 1205 1378 1206 $css_array = split( ';', trim( $css ) );1379 $css_array = explode( ';', trim( $css ) ); 1207 1380 $allowed_attr = apply_filters( 'safe_style_css', array( 'text-align', 'margin', 'color', 'float', 1208 1381 'border', 'background', 'background-color', 'border-bottom', 'border-bottom-color', 1209 1382 'border-bottom-style', 'border-bottom-width', 'border-collapse', 'border-color', 'border-left',