| 2 | /** |
| 3 | * HTML/XHTML filter that only allows some elements and attributes |
| 4 | * |
| 5 | * Added wp_ prefix to avoid conflicts with existing kses users |
| 6 | * |
| 7 | * @version 0.2.2 |
| 8 | * @copyright (C) 2002, 2003, 2005 |
| 9 | * @author Ulf Harnhammar <metaur@users.sourceforge.net> |
| 10 | * |
| 11 | * @package External |
| 12 | * @subpackage KSES |
| 13 | * |
| 14 | * @internal |
| 15 | * *** CONTACT INFORMATION *** |
| 16 | * E-mail: metaur at users dot sourceforge dot net |
| 17 | * Web page: http://sourceforge.net/projects/kses |
| 18 | * Paper mail: Ulf Harnhammar |
| 19 | * Ymergatan 17 C |
| 20 | * 753 25 Uppsala |
| 21 | * SWEDEN |
| 22 | * |
| 23 | * [kses strips evil scripts!] |
| 24 | */ |
3 | | // Added wp_ prefix to avoid conflicts with existing kses users |
4 | | # kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes |
5 | | # Copyright (C) 2002, 2003, 2005 Ulf Harnhammar |
6 | | # *** CONTACT INFORMATION *** |
7 | | # |
8 | | # E-mail: metaur at users dot sourceforge dot net |
9 | | # Web page: http://sourceforge.net/projects/kses |
10 | | # Paper mail: Ulf Harnhammar |
11 | | # Ymergatan 17 C |
12 | | # 753 25 Uppsala |
13 | | # SWEDEN |
14 | | # |
15 | | # [kses strips evil scripts!] |
| 26 | /** |
| 27 | * You can override this in your my-hacks.php file |
| 28 | * You can also override this in a plugin file. The |
| 29 | * my-hacks.php is deprecated in its usage. |
| 30 | * |
| 31 | * @since 1.2.0 |
| 32 | */ |
289 | | function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) |
290 | | ############################################################################### |
291 | | # This function makes sure that only the allowed HTML element names, attribute |
292 | | # names and attribute values plus only sane HTML entities will occur in |
293 | | # $string. You have to remove any slashes from PHP's magic quotes before you |
294 | | # call this function. |
295 | | ############################################################################### |
296 | | { |
| 318 | /** |
| 319 | * wp_kses() - Filters content and keeps only allowable HTML elements. |
| 320 | * |
| 321 | * This function makes sure that only the allowed HTML element names, |
| 322 | * attribute names and attribute values plus only sane HTML entities |
| 323 | * will occur in $string. You have to remove any slashes from PHP's |
| 324 | * magic quotes before you call this function. |
| 325 | * |
| 326 | * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', |
| 327 | * 'news', 'irc', 'gopher', 'nntp', 'feed', and finally 'telnet. This |
| 328 | * covers all common link protocols, except for 'javascript' which |
| 329 | * should not be allowed for untrusted users. |
| 330 | * |
| 331 | * @since 1.0.0 |
| 332 | * |
| 333 | * @param string $string Content to filter through kses |
| 334 | * @param array $allowed_html List of allowed HTML elements |
| 335 | * @param array $allowed_protocols Optional. Allowed protocol in links. |
| 336 | * @return string Filtered content with only allowed HTML elements |
| 337 | */ |
| 338 | function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) { |
305 | | function wp_kses_hook($string, $allowed_html, $allowed_protocols) |
306 | | ############################################################################### |
307 | | # You add any kses hooks here. |
308 | | ############################################################################### |
309 | | { |
| 347 | /** |
| 348 | * wp_kses_hook() - You add any kses hooks here. |
| 349 | * |
| 350 | * There is currently only one kses WordPress hook and it is |
| 351 | * called here. All parameters are passed to the hooks and |
| 352 | * expected to recieve a string. |
| 353 | * |
| 354 | * @since 1.0.0 |
| 355 | * |
| 356 | * @param string $string Content to filter through kses |
| 357 | * @param array $allowed_html List of allowed HTML elements |
| 358 | * @param array $allowed_protocols Allowed protocol in links |
| 359 | * @return string Filtered content through 'pre_kses' hook |
| 360 | */ |
| 361 | function wp_kses_hook($string, $allowed_html, $allowed_protocols) { |
322 | | function wp_kses_split($string, $allowed_html, $allowed_protocols) |
323 | | ############################################################################### |
324 | | # This function searches for HTML tags, no matter how malformed. It also |
325 | | # matches stray ">" characters. |
326 | | ############################################################################### |
327 | | { |
| 377 | /** |
| 378 | * wp_kses_split() - Searches for HTML tags, no matter how malformed |
| 379 | * |
| 380 | * It also matches stray ">" characters. |
| 381 | * |
| 382 | * @since 1.0.0 |
| 383 | * |
| 384 | * @param string $string Content to filter |
| 385 | * @param array $allowed_html Allowed HTML elements |
| 386 | * @param array $allowed_protocols Allowed protocols to keep |
| 387 | * @return string Content with fixed HTML tags |
| 388 | */ |
| 389 | function wp_kses_split($string, $allowed_html, $allowed_protocols) { |
332 | | function wp_kses_split2($string, $allowed_html, $allowed_protocols) |
333 | | ############################################################################### |
334 | | # This function does a lot of work. It rejects some very malformed things |
335 | | # like <:::>. It returns an empty string, if the element isn't allowed (look |
336 | | # ma, no strip_tags()!). Otherwise it splits the tag into an element and an |
337 | | # attribute list. |
338 | | ############################################################################### |
339 | | { |
| 394 | /** |
| 395 | * wp_kses_split2() - Callback for wp_kses_split for fixing malformed HTML tags |
| 396 | * |
| 397 | * This function does a lot of work. It rejects some very malformed things |
| 398 | * like <:::>. It returns an empty string, if the element isn't allowed (look |
| 399 | * ma, no strip_tags()!). Otherwise it splits the tag into an element and an |
| 400 | * attribute list. |
| 401 | * |
| 402 | * After the tag is split into an element and an attribute list, it is run |
| 403 | * through another filter which will remove illegal attributes and once |
| 404 | * that is completed, will be returned. |
| 405 | * |
| 406 | * @since 1.0.0 |
| 407 | * @uses wp_kses_attr() |
| 408 | * |
| 409 | * @param string $string Content to filter |
| 410 | * @param array $allowed_html Allowed HTML elements |
| 411 | * @param array $allowed_protocols Allowed protocols to keep |
| 412 | * @return string Fixed HTML element |
| 413 | */ |
| 414 | function wp_kses_split2($string, $allowed_html, $allowed_protocols) { |
375 | | function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) |
376 | | ############################################################################### |
377 | | # This function removes all attributes, if none are allowed for this element. |
378 | | # If some are allowed it calls wp_kses_hair() to split them further, and then it |
379 | | # builds up new HTML code from the data that kses_hair() returns. It also |
380 | | # removes "<" and ">" characters, if there are any left. One more thing it |
381 | | # does is to check if the tag has a closing XHTML slash, and if it does, |
382 | | # it puts one in the returned code as well. |
383 | | ############################################################################### |
384 | | { |
| 450 | /** |
| 451 | * wp_kses_attr() - Removes all attributes, if none are allowed for this element |
| 452 | * |
| 453 | * If some are allowed it calls wp_kses_hair() to split them further, and then |
| 454 | * it builds up new HTML code from the data that kses_hair() returns. It also |
| 455 | * removes "<" and ">" characters, if there are any left. One more thing it |
| 456 | * does is to check if the tag has a closing XHTML slash, and if it does, it |
| 457 | * puts one in the returned code as well. |
| 458 | * |
| 459 | * @since 1.0.0 |
| 460 | * |
| 461 | * @param string $element HTML element/tag |
| 462 | * @param string $attr HTML attributes from HTML element to closing HTML element tag |
| 463 | * @param array $allowed_html Allowed HTML elements |
| 464 | * @param array $allowed_protocols Allowed protocols to keep |
| 465 | * @return string Sanitized HTML element |
| 466 | */ |
| 467 | function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) { |
438 | | function wp_kses_hair($attr, $allowed_protocols) |
439 | | ############################################################################### |
440 | | # This function does a lot of work. It parses an attribute list into an array |
441 | | # with attribute data, and tries to do the right thing even if it gets weird |
442 | | # input. It will add quotes around attribute values that don't have any quotes |
443 | | # or apostrophes around them, to make it easier to produce HTML code that will |
444 | | # conform to W3C's HTML specification. It will also remove bad URL protocols |
445 | | # from attribute values. |
446 | | ############################################################################### |
447 | | { |
| 521 | /** |
| 522 | * wp_kses_hair() - Builds an attribute list from string containing attributes. |
| 523 | * |
| 524 | * This function does a lot of work. It parses an attribute list into an array |
| 525 | * with attribute data, and tries to do the right thing even if it gets weird |
| 526 | * input. It will add quotes around attribute values that don't have any quotes |
| 527 | * or apostrophes around them, to make it easier to produce HTML code that will |
| 528 | * conform to W3C's HTML specification. It will also remove bad URL protocols |
| 529 | * from attribute values. |
| 530 | * |
| 531 | * @since 1.0.0 |
| 532 | * |
| 533 | * @param string $attr Attribute list from HTML element to closing HTML element tag |
| 534 | * @param array $allowed_protocols Allowed protocols to keep |
| 535 | * @return array List of attributes after parsing |
| 536 | */ |
| 537 | function wp_kses_hair($attr, $allowed_protocols) { |
544 | | function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) |
545 | | ############################################################################### |
546 | | # This function performs different checks for attribute values. The currently |
547 | | # implemented checks are "maxlen", "minlen", "maxval", "minval" and "valueless" |
548 | | # with even more checks to come soon. |
549 | | ############################################################################### |
550 | | { |
| 634 | /** |
| 635 | * wp_kses_check_attr_val() - Performs different checks for attribute values. |
| 636 | * |
| 637 | * The currently implemented checks are "maxlen", "minlen", "maxval", "minval" |
| 638 | * and "valueless" with even more checks to come soon. |
| 639 | * |
| 640 | * @since 1.0.0 |
| 641 | * |
| 642 | * @param string $value Attribute value |
| 643 | * @param string $vless Whether the value is valueless or not. Use 'y' or 'n' |
| 644 | * @param string $checkname What $checkvalue is checking for. |
| 645 | * @param mixed $checkvalue What constraint the value should pass |
| 646 | * @return bool Whether check passes (true) or not (false) |
| 647 | */ |
| 648 | function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) { |
608 | | function wp_kses_bad_protocol($string, $allowed_protocols) |
609 | | ############################################################################### |
610 | | # This function removes all non-allowed protocols from the beginning of |
611 | | # $string. It ignores whitespace and the case of the letters, and it does |
612 | | # understand HTML entities. It does its work in a while loop, so it won't be |
613 | | # fooled by a string like "javascript:javascript:alert(57)". |
614 | | ############################################################################### |
615 | | { |
| 706 | /** |
| 707 | * wp_kses_bad_protocol() - Sanitize string from bad protocols |
| 708 | * |
| 709 | * This function removes all non-allowed protocols from the beginning |
| 710 | * of $string. It ignores whitespace and the case of the letters, and |
| 711 | * it does understand HTML entities. It does its work in a while loop, |
| 712 | * so it won't be fooled by a string like "javascript:javascript:alert(57)". |
| 713 | * |
| 714 | * @since 1.0.0 |
| 715 | * |
| 716 | * @param string $string Content to filter bad protocols from |
| 717 | * @param array $allowed_protocols Allowed protocols to keep |
| 718 | * @return string Filtered content |
| 719 | */ |
| 720 | function wp_kses_bad_protocol($string, $allowed_protocols) { |
639 | | function wp_kses_stripslashes($string) |
640 | | ############################################################################### |
641 | | # This function changes the character sequence \" to just " |
642 | | # It leaves all other slashes alone. It's really weird, but the quoting from |
643 | | # preg_replace(//e) seems to require this. |
644 | | ############################################################################### |
645 | | { |
| 748 | /** |
| 749 | * wp_kses_stripslashes() - Strips slashes from in front of quotes |
| 750 | * |
| 751 | * This function changes the character sequence \" to just " |
| 752 | * It leaves all other slashes alone. It's really weird, but the |
| 753 | * quoting from preg_replace(//e) seems to require this. |
| 754 | * |
| 755 | * @since 1.0.0 |
| 756 | * |
| 757 | * @param string $string String to strip slashes |
| 758 | * @return string Fixed strings with quoted slashes |
| 759 | */ |
| 760 | function wp_kses_stripslashes($string) { |
649 | | function wp_kses_array_lc($inarray) |
650 | | ############################################################################### |
651 | | # This function goes through an array, and changes the keys to all lower case. |
652 | | ############################################################################### |
653 | | { |
| 764 | /** |
| 765 | * wp_kses_array_lc() - Goes through an array and changes the keys to all lower case. |
| 766 | * |
| 767 | * @since 1.0.0 |
| 768 | * |
| 769 | * @param array $inarray Unfiltered array |
| 770 | * @return array Fixed array with all lowercase keys |
| 771 | */ |
| 772 | function wp_kses_array_lc($inarray) { |
678 | | function wp_kses_html_error($string) |
679 | | ############################################################################### |
680 | | # This function deals with parsing errors in wp_kses_hair(). The general plan is |
681 | | # to remove everything to and including some whitespace, but it deals with |
682 | | # quotes and apostrophes as well. |
683 | | ############################################################################### |
684 | | { |
| 800 | /** |
| 801 | * wp_kses_html_error() - Handles parsing errors in wp_kses_hair() |
| 802 | * |
| 803 | * The general plan is to remove everything to and including some |
| 804 | * whitespace, but it deals with quotes and apostrophes as well. |
| 805 | * |
| 806 | * @since 1.0.0 |
| 807 | * |
| 808 | * @param string $string |
| 809 | * @return string |
| 810 | */ |
| 811 | function wp_kses_html_error($string) { |
688 | | function wp_kses_bad_protocol_once($string, $allowed_protocols) |
689 | | ############################################################################### |
690 | | # This function searches for URL protocols at the beginning of $string, while |
691 | | # handling whitespace and HTML entities. |
692 | | ############################################################################### |
693 | | { |
| 815 | /** |
| 816 | * wp_kses_bad_protocol_once() - Sanitizes content from bad protocols and other characters |
| 817 | * |
| 818 | * This function searches for URL protocols at the beginning of $string, |
| 819 | * while handling whitespace and HTML entities. |
| 820 | * |
| 821 | * @since 1.0.0 |
| 822 | * |
| 823 | * @param string $string Content to check for bad protocols |
| 824 | * @param string $allowed_protocols Allowed protocols |
| 825 | * @return string Sanitized content |
| 826 | */ |
| 827 | function wp_kses_bad_protocol_once($string, $allowed_protocols) { |
697 | | function wp_kses_bad_protocol_once2($string, $allowed_protocols) |
698 | | ############################################################################### |
699 | | # This function processes URL protocols, checks to see if they're in the white- |
700 | | # list or not, and returns different data depending on the answer. |
701 | | ############################################################################### |
702 | | { |
| 831 | /** |
| 832 | * wp_kses_bad_protocol_once2() - Callback for wp_kses_bad_protocol_once() regular expression. |
| 833 | * |
| 834 | * This function processes URL protocols, checks to see if they're in the |
| 835 | * white-list or not, and returns different data depending on the answer. |
| 836 | * |
| 837 | * @since 1.0.0 |
| 838 | * |
| 839 | * @param string $string Content to check for bad protocols |
| 840 | * @param array $allowed_protocols Allowed protocols |
| 841 | * @return string Sanitized content |
| 842 | */ |
| 843 | function wp_kses_bad_protocol_once2($string, $allowed_protocols) { |
723 | | function wp_kses_normalize_entities($string) |
724 | | ############################################################################### |
725 | | # This function normalizes HTML entities. It will convert "AT&T" to the correct |
726 | | # "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on. |
727 | | ############################################################################### |
728 | | { |
| 864 | /** |
| 865 | * wp_kses_normalize_entities() - Converts and fixes HTML entities |
| 866 | * |
| 867 | * This function normalizes HTML entities. It will convert "AT&T" to the |
| 868 | * correct "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" |
| 869 | * and so on. |
| 870 | * |
| 871 | * @since 1.0.0 |
| 872 | * |
| 873 | * @param string $string Content to normalize entities |
| 874 | * @return string Content with normalized entities |
| 875 | */ |
| 876 | function wp_kses_normalize_entities($string) { |
742 | | function wp_kses_normalize_entities2($i) |
743 | | ############################################################################### |
744 | | # This function helps wp_kses_normalize_entities() to only accept 16 bit values |
745 | | # and nothing more for &#number; entities. |
746 | | ############################################################################### |
747 | | { |
| 890 | /** |
| 891 | * wp_kses_normalize_entities2() - Callback for wp_kses_normalize_entities() regular expression |
| 892 | * |
| 893 | * This function helps wp_kses_normalize_entities() to only accept 16 bit |
| 894 | * values and nothing more for &#number; entities. |
| 895 | * |
| 896 | * @since 1.0.0 |
| 897 | * |
| 898 | * @param int $i Number encoded entity |
| 899 | * @return string Correctly encoded entity |
| 900 | */ |
| 901 | function wp_kses_normalize_entities2($i) { |
751 | | function wp_kses_decode_entities($string) |
752 | | ############################################################################### |
753 | | # This function decodes numeric HTML entities (A and A). It doesn't |
754 | | # do anything with other entities like ä, but we don't need them in the |
755 | | # URL protocol whitelisting system anyway. |
756 | | ############################################################################### |
757 | | { |
| 905 | /** |
| 906 | * wp_kses_decode_entities() - Convert all entities to their character counterparts. |
| 907 | * |
| 908 | * This function decodes numeric HTML entities (A and A). It |
| 909 | * doesn't do anything with other entities like ä, but we don't need |
| 910 | * them in the URL protocol whitelisting system anyway. |
| 911 | * |
| 912 | * @since 1.0.0 |
| 913 | * |
| 914 | * @param string $string Content to change entities |
| 915 | * @return string Content after decoded entities |
| 916 | */ |
| 917 | function wp_kses_decode_entities($string) { |
| 1014 | /** |
| 1015 | * kses_init() - Sets up most of the Kses filters for input form content |
| 1016 | * |
| 1017 | * If you remove the kses_init() function from 'init' hook and |
| 1018 | * 'set_current_user' (priority is default), then none of the |
| 1019 | * Kses filter hooks will be added. |
| 1020 | * |
| 1021 | * First removes all of the Kses filters in case the current user |
| 1022 | * does not need to have Kses filter the content. If the user does |
| 1023 | * not have unfiltered html capability, then Kses filters are added. |
| 1024 | * |
| 1025 | * @uses kses_remove_filters() Removes the Kses filters |
| 1026 | * @uses kses_init_filters() Adds the Kses filters back if the user |
| 1027 | * does not have unfiltered HTML capability. |
| 1028 | * @since 2.0.0 |
| 1029 | */ |