Index: wp-includes/kses.php
===================================================================
--- wp-includes/kses.php	(revision 17179)
+++ wp-includes/kses.php	(working copy)
@@ -1,7 +1,25 @@
 <?php
 /**
- * HTML/XHTML filter that only allows some elements and attributes
+ * kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes
+ * Copyright (C) 2002, 2003, 2005  Ulf Harnhammar
  *
+ * This program is free software and open source software; you can redistribute
+ * it and/or modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  or visit
+ * http://www.gnu.org/licenses/gpl.html
+ * 
+ * [kses strips evil scripts!]
+ *
  * Added wp_ prefix to avoid conflicts with existing kses users
  *
  * @version 0.2.2
@@ -10,17 +28,6 @@
  *
  * @package External
  * @subpackage KSES
- *
- * @internal
- * *** CONTACT INFORMATION ***
- * E-mail:      metaur at users dot sourceforge dot net
- * Web page:    http://sourceforge.net/projects/kses
- * Paper mail:  Ulf Harnhammar
- *              Ymergatan 17 C
- *              753 25  Uppsala
- *              SWEDEN
- *
- * [kses strips evil scripts!]
  */
 
 /**
@@ -29,10 +36,10 @@
  *
  * @since 1.2.0
  */
-if (!defined('CUSTOM_TAGS'))
-	define('CUSTOM_TAGS', false);
+if ( ! defined( 'CUSTOM_TAGS' ) )
+	define( 'CUSTOM_TAGS', false );
 
-if (!CUSTOM_TAGS) {
+if ( ! CUSTOM_TAGS ) {
 	/**
 	 * Kses global for default allowable HTML tags.
 	 *
@@ -57,6 +64,22 @@
 			'title' => array ()),
 		'acronym' => array(
 			'title' => array ()),
+		'article' => array(
+			'align' => array (),
+			'class' => array (),
+			'dir' => array (),
+			'lang' => array(),
+			'style' => array (),
+			'xml:lang' => array(),
+		),
+		'aside' => array(
+			'align' => array (),
+			'class' => array (),
+			'dir' => array (),
+			'lang' => array(),
+			'style' => array (),
+			'xml:lang' => array(),
+		),
 		'b' => array(),
 		'big' => array(),
 		'blockquote' => array(
@@ -94,6 +117,15 @@
 		'del' => array(
 			'datetime' => array ()),
 		'dd' => array(),
+		'details' => array(
+			'align' => array (),
+			'class' => array (),
+			'dir' => array (),
+			'lang' => array(),
+			'open' => array (),
+			'style' => array (),
+			'xml:lang' => array(),
+		),
 		'div' => array(
 			'align' => array (),
 			'class' => array (),
@@ -105,10 +137,34 @@
 		'dt' => array(),
 		'em' => array(),
 		'fieldset' => array(),
+		'figure' => array(
+			'align' => array (),
+			'class' => array (),
+			'dir' => array (),
+			'lang' => array(),
+			'style' => array (),
+			'xml:lang' => array(),
+		),
+		'figcaption' => array(
+			'align' => array (),
+			'class' => array (),
+			'dir' => array (),
+			'lang' => array(),
+			'style' => array (),
+			'xml:lang' => array(),
+		),
 		'font' => array(
 			'color' => array (),
 			'face' => array (),
 			'size' => array ()),
+		'footer' => array(
+			'align' => array (),
+			'class' => array (),
+			'dir' => array (),
+			'lang' => array(),
+			'style' => array (),
+			'xml:lang' => array(),
+		),
 		'form' => array(
 			'action' => array (),
 			'accept' => array (),
@@ -147,6 +203,22 @@
 			'class' => array (),
 			'id'    => array (),
 			'style' => array ()),
+		'header' => array(
+			'align' => array (),
+			'class' => array (),
+			'dir' => array (),
+			'lang' => array(),
+			'style' => array (),
+			'xml:lang' => array(),
+		),
+		'hgroup' => array(
+			'align' => array (),
+			'class' => array (),
+			'dir' => array (),
+			'lang' => array(),
+			'style' => array (),
+			'xml:lang' => array(),
+		),
 		'hr' => array (
 			'align' => array (),
 			'class' => array (),
@@ -177,6 +249,18 @@
 		'li' => array (
 			'align' => array (),
 			'class' => array ()),
+		'menu' => array (
+			'class' => array (),
+			'style' => array (),
+			'type' => array ()),
+		'nav' => array(
+			'align' => array (),
+			'class' => array (),
+			'dir' => array (),
+			'lang' => array(),
+			'style' => array (),
+			'xml:lang' => array(),
+		),
 		'p' => array(
 			'class' => array (),
 			'align' => array (),
@@ -198,9 +282,25 @@
 			'style' => array (),
 			'title' => array (),
 			'xml:lang' => array()),
+		'section' => array(
+			'align' => array (),
+			'class' => array (),
+			'dir' => array (),
+			'lang' => array(),
+			'style' => array (),
+			'xml:lang' => array(),
+		),
 		'strike' => array(),
 		'strong' => array(),
 		'sub' => array(),
+		'summary' => array(
+			'align' => array (),
+			'class' => array (),
+			'dir' => array (),
+			'lang' => array(),
+			'style' => array (),
+			'xml:lang' => array(),
+		),
 		'sup' => array(),
 		'table' => array(
 			'align' => array (),
@@ -333,6 +433,50 @@
 		//	'u' => array(),
 		//	'ul' => array(),
 	);
+
+	$allowedentitynames = array(
+		'nbsp',    'iexcl',  'cent',    'pound',  'curren', 'yen',
+		'brvbar',  'sect',   'uml',     'copy',   'ordf',   'laquo',
+		'not',     'shy',    'reg',     'macr',   'deg',    'plusmn',
+		'acute',   'micro',  'para',    'middot', 'cedil',  'ordm',
+		'raquo',   'iquest', 'Agrave',  'Aacute', 'Acirc',  'Atilde',
+		'Auml',    'Aring',  'AElig',   'Ccedil', 'Egrave', 'Eacute',
+		'Ecirc',   'Euml',   'Igrave',  'Iacute', 'Icirc',  'Iuml',
+		'ETH',     'Ntilde', 'Ograve',  'Oacute', 'Ocirc',  'Otilde',
+		'Ouml',    'times',  'Oslash',  'Ugrave', 'Uacute', 'Ucirc',
+		'Uuml',    'Yacute', 'THORN',   'szlig',  'agrave', 'aacute',
+		'acirc',   'atilde', 'auml',    'aring',  'aelig',  'ccedil',
+		'egrave',  'eacute', 'ecirc',   'euml',   'igrave', 'iacute',
+		'icirc',   'iuml',   'eth',     'ntilde', 'ograve', 'oacute',
+		'ocirc',   'otilde', 'ouml',    'divide', 'oslash', 'ugrave',
+		'uacute',  'ucirc',  'uuml',    'yacute', 'thorn',  'yuml',
+		'quot',    'amp',    'lt',      'gt',     'apos',   'OElig',
+		'oelig',   'Scaron', 'scaron',  'Yuml',   'circ',   'tilde',
+		'ensp',    'emsp',   'thinsp',  'zwnj',   'zwj',    'lrm',
+		'rlm',     'ndash',  'mdash',   'lsquo',  'rsquo',  'sbquo',
+		'ldquo',   'rdquo',  'bdquo',   'dagger', 'Dagger', 'permil',
+		'lsaquo',  'rsaquo', 'euro',    'fnof',   'Alpha',  'Beta',
+		'Gamma',   'Delta',  'Epsilon', 'Zeta',   'Eta',    'Theta',
+		'Iota',    'Kappa',  'Lambda',  'Mu',     'Nu',     'Xi',
+		'Omicron', 'Pi',     'Rho',     'Sigma',  'Tau',    'Upsilon',
+		'Phi',     'Chi',    'Psi',     'Omega',  'alpha',  'beta',
+		'gamma',   'delta',  'epsilon', 'zeta',   'eta',    'theta',
+		'iota',    'kappa',  'lambda',  'mu',     'nu',     'xi',
+		'omicron', 'pi',     'rho',     'sigmaf', 'sigma',  'tau',
+		'upsilon', 'phi',    'chi',     'psi',    'omega',  'thetasym',
+		'upsih',   'piv',    'bull',    'hellip', 'prime',  'Prime',
+		'oline',   'frasl',  'weierp',  'image',  'real',   'trade',
+		'alefsym', 'larr',   'uarr',    'rarr',   'darr',   'harr',
+		'crarr',   'lArr',   'uArr',    'rArr',   'dArr',   'hArr',
+		'forall',  'part',   'exist',   'empty',  'nabla',  'isin',
+		'notin',   'ni',     'prod',    'sum',    'minus',  'lowast',
+		'radic',   'prop',   'infin',   'ang',    'and',    'or',
+		'cap',     'cup',    'int',     'sim',    'cong',   'asymp',
+		'ne',      'equiv',  'le',      'ge',     'sub',    'sup',
+		'nsub',    'sube',   'supe',    'oplus',  'otimes', 'perp',
+		'sdot',    'lceil',  'rceil',   'lfloor', 'rfloor', 'lang',
+		'rang',    'loz',    'spades',  'clubs',  'hearts', 'diams',
+	);
 }
 
 /**
@@ -344,9 +488,9 @@
  * call this function.
  *
  * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', 'news',
- * 'irc', 'gopher', 'nntp', 'feed', and finally 'telnet. This covers all common
- * link protocols, except for 'javascript' which should not be allowed for
- * untrusted users.
+ * 'irc', 'gopher', 'nntp', 'feed', 'telnet, 'mms', 'rtsp' and 'svn'. This
+ * covers all common link protocols, except for 'javascript' which should not
+ * be allowed for untrusted users.
  *
  * @since 1.0.0
  *
@@ -355,7 +499,8 @@
  * @param array $allowed_protocols Optional. Allowed protocol in links.
  * @return string Filtered content with only allowed HTML elements
  */
-function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) {
+function wp_kses($string, $allowed_html, $allowed_protocols = array ()) {
+	$allowed_protocols = wp_parse_args( $allowed_protocols, apply_filters('kses_allowed_protocols', array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet', 'mms', 'rtsp', 'svn') ));
 	$string = wp_kses_no_null($string);
 	$string = wp_kses_js_entities($string);
 	$string = wp_kses_normalize_entities($string);
@@ -409,11 +554,21 @@
 	global $pass_allowed_html, $pass_allowed_protocols;
 	$pass_allowed_html = $allowed_html;
 	$pass_allowed_protocols = $allowed_protocols;
-	return preg_replace_callback('%((<!--.*?(-->|$))|(<[^>]*(>|$)|>))%',
-		create_function('$match', 'global $pass_allowed_html, $pass_allowed_protocols; return wp_kses_split2($match[1], $pass_allowed_html, $pass_allowed_protocols);'), $string);
+	return preg_replace_callback( '%((<!--.*?(-->|$))|(<[^>]*(>|$)|>))%', '_wp_kses_split_callback', $string );
 }
 
 /**
+ * Callback for wp_kses_split.
+ *
+ * @since 3.1.0
+ * @access private
+ */
+function _wp_kses_split_callback( $match ) {
+	global $pass_allowed_html, $pass_allowed_protocols;
+	return wp_kses_split2( $match[1], $pass_allowed_html, $pass_allowed_protocols );
+}
+
+/**
  * Callback for wp_kses_split for fixing malformed HTML tags.
  *
  * This function does a lot of work. It rejects some very malformed things like
@@ -495,7 +650,7 @@
 	# Is there a closing XHTML slash at the end of the attributes?
 
 	$xhtml_slash = '';
-	if (preg_match('%\s/\s*$%', $attr))
+	if (preg_match('%\s*/\s*$%', $attr))
 		$xhtml_slash = ' /';
 
 	# Are any attributes allowed at all for this element?
@@ -533,7 +688,7 @@
 					break;
 				}
 
-			if ( $arreach['name'] == 'style' ) {
+			if ( strtolower($arreach['name']) == 'style' ) {
 				$orig_value = $arreach['value'];
 
 				$value = safecss_filter_attr($orig_value);
@@ -621,11 +776,11 @@
 
 			case 2 : # attribute value, a URL after href= for instance
 
-				if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match))
+				if (preg_match('%^"([^"]*)"(\s+|/?$)%', $attr, $match))
 					# "value"
 					{
 					$thisval = $match[1];
-					if ( in_array($attrname, $uris) )
+					if ( in_array(strtolower($attrname), $uris) )
 						$thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
 
 					if(FALSE === array_key_exists($attrname, $attrarr)) {
@@ -637,11 +792,11 @@
 					break;
 				}
 
-				if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match))
+				if (preg_match("%^'([^']*)'(\s+|/?$)%", $attr, $match))
 					# 'value'
 					{
 					$thisval = $match[1];
-					if ( in_array($attrname, $uris) )
+					if ( in_array(strtolower($attrname), $uris) )
 						$thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
 
 					if(FALSE === array_key_exists($attrname, $attrarr)) {
@@ -653,11 +808,11 @@
 					break;
 				}
 
-				if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match))
+				if (preg_match("%^([^\s\"']+)(\s+|/?$)%", $attr, $match))
 					# value
 					{
 					$thisval = $match[1];
-					if ( in_array($attrname, $uris) )
+					if ( in_array(strtolower($attrname), $uris) )
 						$thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
 
 					if(FALSE === array_key_exists($attrname, $attrarr)) {
@@ -699,7 +854,7 @@
  * @param string $vless Whether the value is valueless or not. Use 'y' or 'n'
  * @param string $checkname What $checkvalue is checking for.
  * @param mixed $checkvalue What constraint the value should pass
- * @return bool Whether check passes (true) or not (false)
+ * @return bool Whether check passes
  */
 function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) {
 	$ok = true;
@@ -880,15 +1035,10 @@
  * @return string Sanitized content
  */
 function wp_kses_bad_protocol_once($string, $allowed_protocols) {
-	global $_kses_allowed_protocols;
-	$_kses_allowed_protocols = $allowed_protocols;
+	$string2 = preg_split( '/:|&#0*58;|&#x0*3a;/i', $string, 2 );
+	if ( isset($string2[1]) && ! preg_match('%/\?%', $string2[0]) )
+		$string = wp_kses_bad_protocol_once2( $string2[0], $allowed_protocols ) . trim( $string2[1] );
 
-	$string2 = preg_split('/:|&#58;|&#x3a;/i', $string, 2);
-	if ( isset($string2[1]) && !preg_match('%/\?%', $string2[0]) )
-		$string = wp_kses_bad_protocol_once2($string2[0]) . trim($string2[1]);
-	else
-		$string = preg_replace_callback('/^((&[^;]*;|[\sA-Za-z0-9])*)'.'(:|&#58;|&#[Xx]3[Aa];)\s*/', 'wp_kses_bad_protocol_once2', $string);
-
 	return $string;
 }
 
@@ -901,29 +1051,19 @@
  * @access private
  * @since 1.0.0
  *
- * @param mixed $matches string or preg_replace_callback() matches array to check for bad protocols
+ * @param string $string URI scheme to check against the whitelist
+ * @param string $allowed_protocols Allowed protocols
  * @return string Sanitized content
  */
-function wp_kses_bad_protocol_once2($matches) {
-	global $_kses_allowed_protocols;
-
-	if ( is_array($matches) ) {
-		if ( ! isset($matches[1]) || empty($matches[1]) )
-			return '';
-
-		$string = $matches[1];
-	} else {
-		$string = $matches;
-	}
-
+function wp_kses_bad_protocol_once2( $string, $allowed_protocols ) {
 	$string2 = wp_kses_decode_entities($string);
 	$string2 = preg_replace('/\s/', '', $string2);
 	$string2 = wp_kses_no_null($string2);
 	$string2 = strtolower($string2);
 
 	$allowed = false;
-	foreach ( (array) $_kses_allowed_protocols as $one_protocol)
-		if (strtolower($one_protocol) == $string2) {
+	foreach ( (array) $allowed_protocols as $one_protocol )
+		if ( strtolower($one_protocol) == $string2 ) {
 			$allowed = true;
 			break;
 		}
@@ -952,9 +1092,9 @@
 
 	# Change back the allowed entities in our entity whitelist
 
-	$string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string);
-	$string = preg_replace_callback('/&amp;#0*([0-9]{1,5});/', 'wp_kses_normalize_entities2', $string);
-	$string = preg_replace_callback('/&amp;#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', 'wp_kses_normalize_entities3', $string);
+	$string = preg_replace_callback('/&amp;([A-Za-z]{2,8});/', 'wp_kses_named_entities', $string);
+	$string = preg_replace_callback('/&amp;#(0*[0-9]{1,7});/', 'wp_kses_normalize_entities2', $string);
+	$string = preg_replace_callback('/&amp;#[Xx](0*[0-9A-Fa-f]{1,6});/', 'wp_kses_normalize_entities3', $string);
 
 	return $string;
 }
@@ -962,6 +1102,27 @@
 /**
  * Callback for wp_kses_normalize_entities() regular expression.
  *
+ * This function only accepts valid named entity references, which are finite,
+ * case-sensitive, and highly scrutinized by HTML and XML validators.
+ *
+ * @since 3.0.0
+ *
+ * @param array $matches preg_replace_callback() matches array
+ * @return string Correctly encoded entity
+ */
+function wp_kses_named_entities($matches) {
+	global $allowedentitynames;
+
+	if ( empty($matches[1]) )
+		return '';
+
+	$i = $matches[1];
+	return ( ( ! in_array($i, $allowedentitynames) ) ? "&amp;$i;" : "&$i;" );
+}
+
+/**
+ * Callback for wp_kses_normalize_entities() regular expression.
+ *
  * This function helps wp_kses_normalize_entities() to only accept 16 bit values
  * and nothing more for &#number; entities.
  *
@@ -972,11 +1133,18 @@
  * @return string Correctly encoded entity
  */
 function wp_kses_normalize_entities2($matches) {
-	if ( ! isset($matches[1]) || empty($matches[1]) )
+	if ( empty($matches[1]) )
 		return '';
 
 	$i = $matches[1];
-	return ( ( ! valid_unicode($i) ) || ($i > 65535) ? "&amp;#$i;" : "&#$i;" );
+	if (valid_unicode($i)) {
+		$i = str_pad(ltrim($i,'0'), 3, '0', STR_PAD_LEFT);
+		$i = "&#$i;";
+	} else {
+		$i = "&amp;#$i;";
+	}
+
+	return $i;
 }
 
 /**
@@ -991,11 +1159,11 @@
  * @return string Correctly encoded entity
  */
 function wp_kses_normalize_entities3($matches) {
-	if ( ! isset($matches[2]) || empty($matches[2]) )
+	if ( empty($matches[1]) )
 		return '';
 
-	$hexchars = $matches[2];
-	return ( ( ! valid_unicode(hexdec($hexchars)) ) ? "&amp;#x$hexchars;" : "&#x$hexchars;" );
+	$hexchars = $matches[1];
+	return ( ( ! valid_unicode(hexdec($hexchars)) ) ? "&amp;#x$hexchars;" : '&#x'.ltrim($hexchars,'0').';' );
 }
 
 /**
@@ -1196,14 +1364,19 @@
 add_action('init', 'kses_init');
 add_action('set_current_user', 'kses_init');
 
+/**
+ * Inline CSS filter
+ *
+ * @since 2.8.1
+ */
 function safecss_filter_attr( $css, $deprecated = '' ) {
 	$css = wp_kses_no_null($css);
 	$css = str_replace(array("\n","\r","\t"), '', $css);
 
-	if ( preg_match( '%[\\(&]|/\*%', $css ) ) // remove any inline css containing \ ( & or comments
+	if ( preg_match( '%[\\(&=}]|/\*%', $css ) ) // remove any inline css containing \ ( & } = or comments
 		return '';
 
-	$css_array = split( ';', trim( $css ) );
+	$css_array = explode( ';', trim( $css ) );
 	$allowed_attr = apply_filters( 'safe_style_css', array( 'text-align', 'margin', 'color', 'float',
 	'border', 'background', 'background-color', 'border-bottom', 'border-bottom-color',
 	'border-bottom-style', 'border-bottom-width', 'border-collapse', 'border-color', 'border-left',
