| 2 | /** |
| 3 | * HTML/XHTML filter that only allows some elements and attributes |
| 4 | * |
| 5 | * Added wp_ prefix to avoid conflicts with existing kses users |
| 6 | * |
| 7 | * @version 0.2.2 |
| 8 | * @copyright (C) 2002, 2003, 2005 |
| 9 | * @author Ulf Harnhammar <metaur@users.sourceforge.net> |
| 10 | * |
| 11 | * @package External |
| 12 | * @subpackage KSES |
| 13 | * |
| 14 | * @internal |
| 15 | * *** CONTACT INFORMATION *** |
| 16 | * E-mail: metaur at users dot sourceforge dot net |
| 17 | * Web page: http://sourceforge.net/projects/kses |
| 18 | * Paper mail: Ulf Harnhammar |
| 19 | * Ymergatan 17 C |
| 20 | * 753 25 Uppsala |
| 21 | * SWEDEN |
| 22 | * |
| 23 | * [kses strips evil scripts!] |
| 24 | */ |
3 | | // Added wp_ prefix to avoid conflicts with existing kses users |
4 | | # kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes |
5 | | # Copyright (C) 2002, 2003, 2005 Ulf Harnhammar |
6 | | # *** CONTACT INFORMATION *** |
7 | | # |
8 | | # E-mail: metaur at users dot sourceforge dot net |
9 | | # Web page: http://sourceforge.net/projects/kses |
10 | | # Paper mail: Ulf Harnhammar |
11 | | # Ymergatan 17 C |
12 | | # 753 25 Uppsala |
13 | | # SWEDEN |
14 | | # |
15 | | # [kses strips evil scripts!] |
| 26 | /** |
| 27 | * You can override this in your my-hacks.php file |
| 28 | * You can also override this in a plugin file. The |
| 29 | * my-hacks.php is deprecated in its usage. |
| 30 | * |
| 31 | * @since 1.2.0 |
| 32 | */ |
222 | | function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) |
223 | | ############################################################################### |
224 | | # This function makes sure that only the allowed HTML element names, attribute |
225 | | # names and attribute values plus only sane HTML entities will occur in |
226 | | # $string. You have to remove any slashes from PHP's magic quotes before you |
227 | | # call this function. |
228 | | ############################################################################### |
229 | | { |
| 251 | /** |
| 252 | * wp_kses() - Filters content and keeps only allowable HTML elements. |
| 253 | * |
| 254 | * This function makes sure that only the allowed HTML element names, |
| 255 | * attribute names and attribute values plus only sane HTML entities |
| 256 | * will occur in $string. You have to remove any slashes from PHP's |
| 257 | * magic quotes before you call this function. |
| 258 | * |
| 259 | * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', |
| 260 | * 'news', 'irc', 'gopher', 'nntp', 'feed', and finally 'telnet. This |
| 261 | * covers all common link protocols, except for 'javascript' which |
| 262 | * should not be allowed for untrusted users. |
| 263 | * |
| 264 | * @since 1.0.0 |
| 265 | * |
| 266 | * @param string $string Content to filter through kses |
| 267 | * @param array $allowed_html List of allowed HTML elements |
| 268 | * @param array $allowed_protocols Optional. Allowed protocol in links. |
| 269 | * @return string Filtered content with only allowed HTML elements |
| 270 | */ |
| 271 | function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) { |
238 | | function wp_kses_hook($string, $allowed_html, $allowed_protocols) |
239 | | ############################################################################### |
240 | | # You add any kses hooks here. |
241 | | ############################################################################### |
242 | | { |
| 280 | /** |
| 281 | * wp_kses_hook() - You add any kses hooks here. |
| 282 | * |
| 283 | * There is currently only one kses WordPress hook and it is |
| 284 | * called here. All parameters are passed to the hooks and |
| 285 | * expected to recieve a string. |
| 286 | * |
| 287 | * @since 1.0.0 |
| 288 | * |
| 289 | * @param string $string Content to filter through kses |
| 290 | * @param array $allowed_html List of allowed HTML elements |
| 291 | * @param array $allowed_protocols Allowed protocol in links |
| 292 | * @return string Filtered content through 'pre_kses' hook |
| 293 | */ |
| 294 | function wp_kses_hook($string, $allowed_html, $allowed_protocols) { |
255 | | function wp_kses_split($string, $allowed_html, $allowed_protocols) |
256 | | ############################################################################### |
257 | | # This function searches for HTML tags, no matter how malformed. It also |
258 | | # matches stray ">" characters. |
259 | | ############################################################################### |
260 | | { |
| 310 | /** |
| 311 | * wp_kses_split() - Searches for HTML tags, no matter how malformed |
| 312 | * |
| 313 | * It also matches stray ">" characters. |
| 314 | * |
| 315 | * @since 1.0.0 |
| 316 | * |
| 317 | * @param string $string Content to filter |
| 318 | * @param array $allowed_html Allowed HTML elements |
| 319 | * @param array $allowed_protocols Allowed protocols to keep |
| 320 | * @return string Content with fixed HTML tags |
| 321 | */ |
| 322 | function wp_kses_split($string, $allowed_html, $allowed_protocols) { |
265 | | function wp_kses_split2($string, $allowed_html, $allowed_protocols) |
266 | | ############################################################################### |
267 | | # This function does a lot of work. It rejects some very malformed things |
268 | | # like <:::>. It returns an empty string, if the element isn't allowed (look |
269 | | # ma, no strip_tags()!). Otherwise it splits the tag into an element and an |
270 | | # attribute list. |
271 | | ############################################################################### |
272 | | { |
| 327 | /** |
| 328 | * wp_kses_split2() - Callback for wp_kses_split for fixing malformed HTML tags |
| 329 | * |
| 330 | * This function does a lot of work. It rejects some very malformed things |
| 331 | * like <:::>. It returns an empty string, if the element isn't allowed (look |
| 332 | * ma, no strip_tags()!). Otherwise it splits the tag into an element and an |
| 333 | * attribute list. |
| 334 | * |
| 335 | * After the tag is split into an element and an attribute list, it is run |
| 336 | * through another filter which will remove illegal attributes and once |
| 337 | * that is completed, will be returned. |
| 338 | * |
| 339 | * @since 1.0.0 |
| 340 | * @uses wp_kses_attr() |
| 341 | * |
| 342 | * @param string $string Content to filter |
| 343 | * @param array $allowed_html Allowed HTML elements |
| 344 | * @param array $allowed_protocols Allowed protocols to keep |
| 345 | * @return string Fixed HTML element |
| 346 | */ |
| 347 | function wp_kses_split2($string, $allowed_html, $allowed_protocols) { |
| 383 | /** |
| 384 | * wp_kses_attr() - Removes all attributes, if none are allowed for this element |
| 385 | * |
| 386 | * If some are allowed it calls wp_kses_hair() to split them further, and then |
| 387 | * it builds up new HTML code from the data that kses_hair() returns. It also |
| 388 | * removes "<" and ">" characters, if there are any left. One more thing it |
| 389 | * does is to check if the tag has a closing XHTML slash, and if it does, it |
| 390 | * puts one in the returned code as well. |
| 391 | * |
| 392 | * @since 1.0.0 |
| 393 | * |
| 394 | * @param string $element HTML element/tag |
| 395 | * @param string $attr HTML attributes from HTML element to closing HTML element tag |
| 396 | * @param array $allowed_html Allowed HTML elements |
| 397 | * @param array $allowed_protocols Allowed protocols to keep |
| 398 | * @return string Sanitized HTML element |
| 399 | */ |
309 | | ############################################################################### |
310 | | # This function removes all attributes, if none are allowed for this element. |
311 | | # If some are allowed it calls wp_kses_hair() to split them further, and then it |
312 | | # builds up new HTML code from the data that kses_hair() returns. It also |
313 | | # removes "<" and ">" characters, if there are any left. One more thing it |
314 | | # does is to check if the tag has a closing XHTML slash, and if it does, |
315 | | # it puts one in the returned code as well. |
316 | | ############################################################################### |
| 455 | /** |
| 456 | * wp_kses_hair() - Builds an attribute list from string containing attributes. |
| 457 | * |
| 458 | * This function does a lot of work. It parses an attribute list into an array |
| 459 | * with attribute data, and tries to do the right thing even if it gets weird |
| 460 | * input. It will add quotes around attribute values that don't have any quotes |
| 461 | * or apostrophes around them, to make it easier to produce HTML code that will |
| 462 | * conform to W3C's HTML specification. It will also remove bad URL protocols |
| 463 | * from attribute values. |
| 464 | * |
| 465 | * @since 1.0.0 |
| 466 | * |
| 467 | * @param string $attr Attribute list from HTML element to closing HTML element tag |
| 468 | * @param array $allowed_protocols Allowed protocols to keep |
| 469 | * @return array List of attributes after parsing |
| 470 | */ |
372 | | ############################################################################### |
373 | | # This function does a lot of work. It parses an attribute list into an array |
374 | | # with attribute data, and tries to do the right thing even if it gets weird |
375 | | # input. It will add quotes around attribute values that don't have any quotes |
376 | | # or apostrophes around them, to make it easier to produce HTML code that will |
377 | | # conform to W3C's HTML specification. It will also remove bad URL protocols |
378 | | # from attribute values. |
379 | | ############################################################################### |
| 569 | /** |
| 570 | * wp_kses_check_attr_val() - Performs different checks for attribute values. |
| 571 | * |
| 572 | * The currently implemented checks are "maxlen", "minlen", "maxval", "minval" |
| 573 | * and "valueless" with even more checks to come soon. |
| 574 | * |
| 575 | * @since 1.0.0 |
| 576 | * |
| 577 | * @param string $value Attribute value |
| 578 | * @param string $vless Whether the value is valueless or not. Use 'y' or 'n' |
| 579 | * @param string $checkname What $checkvalue is checking for. |
| 580 | * @param mixed $checkvalue What constraint the value should pass |
| 581 | * @return bool Whether check passes (true) or not (false) |
| 582 | */ |
| 642 | /** |
| 643 | * wp_kses_bad_protocol() - Sanitize string from bad protocols |
| 644 | * |
| 645 | * This function removes all non-allowed protocols from the beginning |
| 646 | * of $string. It ignores whitespace and the case of the letters, and |
| 647 | * it does understand HTML entities. It does its work in a while loop, |
| 648 | * so it won't be fooled by a string like "javascript:javascript:alert(57)". |
| 649 | * |
| 650 | * @since 1.0.0 |
| 651 | * |
| 652 | * @param string $string Content to filter bad protocols from |
| 653 | * @param array $allowed_protocols Allowed protocols to keep |
| 654 | * @return string Filtered content |
| 655 | */ |
611 | | function wp_kses_html_error($string) |
612 | | ############################################################################### |
613 | | # This function deals with parsing errors in wp_kses_hair(). The general plan is |
614 | | # to remove everything to and including some whitespace, but it deals with |
615 | | # quotes and apostrophes as well. |
616 | | ############################################################################### |
617 | | { |
| 740 | /** |
| 741 | * wp_kses_html_error() - Handles parsing errors in wp_kses_hair() |
| 742 | * |
| 743 | * The general plan is to remove everything to and including some |
| 744 | * whitespace, but it deals with quotes and apostrophes as well. |
| 745 | * |
| 746 | * @since 1.0.0 |
| 747 | * |
| 748 | * @param string $string |
| 749 | * @return string |
| 750 | */ |
| 751 | function wp_kses_html_error($string) { |
621 | | function wp_kses_bad_protocol_once($string, $allowed_protocols) |
622 | | ############################################################################### |
623 | | # This function searches for URL protocols at the beginning of $string, while |
624 | | # handling whitespace and HTML entities. |
625 | | ############################################################################### |
626 | | { |
| 755 | /** |
| 756 | * wp_kses_bad_protocol_once() - Sanitizes content from bad protocols and other characters |
| 757 | * |
| 758 | * This function searches for URL protocols at the beginning of $string, |
| 759 | * while handling whitespace and HTML entities. |
| 760 | * |
| 761 | * @since 1.0.0 |
| 762 | * |
| 763 | * @param string $string Content to check for bad protocols |
| 764 | * @param string $allowed_protocols Allowed protocols |
| 765 | * @return string Sanitized content |
| 766 | */ |
| 767 | function wp_kses_bad_protocol_once($string, $allowed_protocols) { |
630 | | function wp_kses_bad_protocol_once2($string, $allowed_protocols) |
631 | | ############################################################################### |
632 | | # This function processes URL protocols, checks to see if they're in the white- |
633 | | # list or not, and returns different data depending on the answer. |
634 | | ############################################################################### |
635 | | { |
| 771 | /** |
| 772 | * wp_kses_bad_protocol_once2() - Callback for wp_kses_bad_protocol_once() regular expression. |
| 773 | * |
| 774 | * This function processes URL protocols, checks to see if they're in the |
| 775 | * white-list or not, and returns different data depending on the answer. |
| 776 | * |
| 777 | * @since 1.0.0 |
| 778 | * |
| 779 | * @param string $string Content to check for bad protocols |
| 780 | * @param array $allowed_protocols Allowed protocols |
| 781 | * @return string Sanitized content |
| 782 | */ |
| 783 | function wp_kses_bad_protocol_once2($string, $allowed_protocols) { |
656 | | function wp_kses_normalize_entities($string) |
657 | | ############################################################################### |
658 | | # This function normalizes HTML entities. It will convert "AT&T" to the correct |
659 | | # "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on. |
660 | | ############################################################################### |
661 | | { |
| 804 | /** |
| 805 | * wp_kses_normalize_entities() - Converts and fixes HTML entities |
| 806 | * |
| 807 | * This function normalizes HTML entities. It will convert "AT&T" to the |
| 808 | * correct "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" |
| 809 | * and so on. |
| 810 | * |
| 811 | * @since 1.0.0 |
| 812 | * |
| 813 | * @param string $string Content to normalize entities |
| 814 | * @return string Content with normalized entities |
| 815 | */ |
| 816 | function wp_kses_normalize_entities($string) { |
675 | | function wp_kses_normalize_entities2($i) |
676 | | ############################################################################### |
677 | | # This function helps wp_kses_normalize_entities() to only accept 16 bit values |
678 | | # and nothing more for &#number; entities. |
679 | | ############################################################################### |
680 | | { |
| 830 | /** |
| 831 | * wp_kses_normalize_entities2() - Callback for wp_kses_normalize_entities() regular expression |
| 832 | * |
| 833 | * This function helps wp_kses_normalize_entities() to only accept 16 bit |
| 834 | * values and nothing more for &#number; entities. |
| 835 | * |
| 836 | * @since 1.0.0 |
| 837 | * |
| 838 | * @param int $i Number encoded entity |
| 839 | * @return string Correctly encoded entity |
| 840 | */ |
| 841 | function wp_kses_normalize_entities2($i) { |
684 | | function wp_kses_decode_entities($string) |
685 | | ############################################################################### |
686 | | # This function decodes numeric HTML entities (A and A). It doesn't |
687 | | # do anything with other entities like ä, but we don't need them in the |
688 | | # URL protocol whitelisting system anyway. |
689 | | ############################################################################### |
690 | | { |
| 845 | /** |
| 846 | * wp_kses_decode_entities() - Convert all entities to their character counterparts. |
| 847 | * |
| 848 | * This function decodes numeric HTML entities (A and A). It |
| 849 | * doesn't do anything with other entities like ä, but we don't need |
| 850 | * them in the URL protocol whitelisting system anyway. |
| 851 | * |
| 852 | * @since 1.0.0 |
| 853 | * |
| 854 | * @param string $string Content to change entities |
| 855 | * @return string Content after decoded entities |
| 856 | */ |
| 857 | function wp_kses_decode_entities($string) { |
| 954 | /** |
| 955 | * kses_init() - Sets up most of the Kses filters for input form content |
| 956 | * |
| 957 | * If you remove the kses_init() function from 'init' hook and |
| 958 | * 'set_current_user' (priority is default), then none of the |
| 959 | * Kses filter hooks will be added. |
| 960 | * |
| 961 | * First removes all of the Kses filters in case the current user |
| 962 | * does not need to have Kses filter the content. If the user does |
| 963 | * not have unfiltered html capability, then Kses filters are added. |
| 964 | * |
| 965 | * @uses kses_remove_filters() Removes the Kses filters |
| 966 | * @uses kses_init_filters() Adds the Kses filters back if the user |
| 967 | * does not have unfiltered HTML capability. |
| 968 | * @since 2.0.0 |
| 969 | */ |