| | 2 | /** |
| | 3 | * HTML/XHTML filter that only allows some elements and attributes |
| | 4 | * |
| | 5 | * Added wp_ prefix to avoid conflicts with existing kses users |
| | 6 | * |
| | 7 | * @version 0.2.2 |
| | 8 | * @copyright (C) 2002, 2003, 2005 |
| | 9 | * @author Ulf Harnhammar <metaur@users.sourceforge.net> |
| | 10 | * |
| | 11 | * @package External |
| | 12 | * @subpackage KSES |
| | 13 | * |
| | 14 | * @internal |
| | 15 | * *** CONTACT INFORMATION *** |
| | 16 | * E-mail: metaur at users dot sourceforge dot net |
| | 17 | * Web page: http://sourceforge.net/projects/kses |
| | 18 | * Paper mail: Ulf Harnhammar |
| | 19 | * Ymergatan 17 C |
| | 20 | * 753 25 Uppsala |
| | 21 | * SWEDEN |
| | 22 | * |
| | 23 | * [kses strips evil scripts!] |
| | 24 | */ |
| 3 | | // Added wp_ prefix to avoid conflicts with existing kses users |
| 4 | | # kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes |
| 5 | | # Copyright (C) 2002, 2003, 2005 Ulf Harnhammar |
| 6 | | # *** CONTACT INFORMATION *** |
| 7 | | # |
| 8 | | # E-mail: metaur at users dot sourceforge dot net |
| 9 | | # Web page: http://sourceforge.net/projects/kses |
| 10 | | # Paper mail: Ulf Harnhammar |
| 11 | | # Ymergatan 17 C |
| 12 | | # 753 25 Uppsala |
| 13 | | # SWEDEN |
| 14 | | # |
| 15 | | # [kses strips evil scripts!] |
| | 26 | /** |
| | 27 | * You can override this in your my-hacks.php file |
| | 28 | * You can also override this in a plugin file. The |
| | 29 | * my-hacks.php is deprecated in its usage. |
| | 30 | * |
| | 31 | * @since 1.2.0 |
| | 32 | */ |
| 289 | | function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) |
| 290 | | ############################################################################### |
| 291 | | # This function makes sure that only the allowed HTML element names, attribute |
| 292 | | # names and attribute values plus only sane HTML entities will occur in |
| 293 | | # $string. You have to remove any slashes from PHP's magic quotes before you |
| 294 | | # call this function. |
| 295 | | ############################################################################### |
| 296 | | { |
| | 318 | /** |
| | 319 | * wp_kses() - Filters content and keeps only allowable HTML elements. |
| | 320 | * |
| | 321 | * This function makes sure that only the allowed HTML element names, |
| | 322 | * attribute names and attribute values plus only sane HTML entities |
| | 323 | * will occur in $string. You have to remove any slashes from PHP's |
| | 324 | * magic quotes before you call this function. |
| | 325 | * |
| | 326 | * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', |
| | 327 | * 'news', 'irc', 'gopher', 'nntp', 'feed', and finally 'telnet. This |
| | 328 | * covers all common link protocols, except for 'javascript' which |
| | 329 | * should not be allowed for untrusted users. |
| | 330 | * |
| | 331 | * @since 1.0.0 |
| | 332 | * |
| | 333 | * @param string $string Content to filter through kses |
| | 334 | * @param array $allowed_html List of allowed HTML elements |
| | 335 | * @param array $allowed_protocols Optional. Allowed protocol in links. |
| | 336 | * @return string Filtered content with only allowed HTML elements |
| | 337 | */ |
| | 338 | function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) { |
| 305 | | function wp_kses_hook($string, $allowed_html, $allowed_protocols) |
| 306 | | ############################################################################### |
| 307 | | # You add any kses hooks here. |
| 308 | | ############################################################################### |
| 309 | | { |
| | 347 | /** |
| | 348 | * wp_kses_hook() - You add any kses hooks here. |
| | 349 | * |
| | 350 | * There is currently only one kses WordPress hook and it is |
| | 351 | * called here. All parameters are passed to the hooks and |
| | 352 | * expected to recieve a string. |
| | 353 | * |
| | 354 | * @since 1.0.0 |
| | 355 | * |
| | 356 | * @param string $string Content to filter through kses |
| | 357 | * @param array $allowed_html List of allowed HTML elements |
| | 358 | * @param array $allowed_protocols Allowed protocol in links |
| | 359 | * @return string Filtered content through 'pre_kses' hook |
| | 360 | */ |
| | 361 | function wp_kses_hook($string, $allowed_html, $allowed_protocols) { |
| 322 | | function wp_kses_split($string, $allowed_html, $allowed_protocols) |
| 323 | | ############################################################################### |
| 324 | | # This function searches for HTML tags, no matter how malformed. It also |
| 325 | | # matches stray ">" characters. |
| 326 | | ############################################################################### |
| 327 | | { |
| | 377 | /** |
| | 378 | * wp_kses_split() - Searches for HTML tags, no matter how malformed |
| | 379 | * |
| | 380 | * It also matches stray ">" characters. |
| | 381 | * |
| | 382 | * @since 1.0.0 |
| | 383 | * |
| | 384 | * @param string $string Content to filter |
| | 385 | * @param array $allowed_html Allowed HTML elements |
| | 386 | * @param array $allowed_protocols Allowed protocols to keep |
| | 387 | * @return string Content with fixed HTML tags |
| | 388 | */ |
| | 389 | function wp_kses_split($string, $allowed_html, $allowed_protocols) { |
| 332 | | function wp_kses_split2($string, $allowed_html, $allowed_protocols) |
| 333 | | ############################################################################### |
| 334 | | # This function does a lot of work. It rejects some very malformed things |
| 335 | | # like <:::>. It returns an empty string, if the element isn't allowed (look |
| 336 | | # ma, no strip_tags()!). Otherwise it splits the tag into an element and an |
| 337 | | # attribute list. |
| 338 | | ############################################################################### |
| 339 | | { |
| | 394 | /** |
| | 395 | * wp_kses_split2() - Callback for wp_kses_split for fixing malformed HTML tags |
| | 396 | * |
| | 397 | * This function does a lot of work. It rejects some very malformed things |
| | 398 | * like <:::>. It returns an empty string, if the element isn't allowed (look |
| | 399 | * ma, no strip_tags()!). Otherwise it splits the tag into an element and an |
| | 400 | * attribute list. |
| | 401 | * |
| | 402 | * After the tag is split into an element and an attribute list, it is run |
| | 403 | * through another filter which will remove illegal attributes and once |
| | 404 | * that is completed, will be returned. |
| | 405 | * |
| | 406 | * @since 1.0.0 |
| | 407 | * @uses wp_kses_attr() |
| | 408 | * |
| | 409 | * @param string $string Content to filter |
| | 410 | * @param array $allowed_html Allowed HTML elements |
| | 411 | * @param array $allowed_protocols Allowed protocols to keep |
| | 412 | * @return string Fixed HTML element |
| | 413 | */ |
| | 414 | function wp_kses_split2($string, $allowed_html, $allowed_protocols) { |
| 375 | | function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) |
| 376 | | ############################################################################### |
| 377 | | # This function removes all attributes, if none are allowed for this element. |
| 378 | | # If some are allowed it calls wp_kses_hair() to split them further, and then it |
| 379 | | # builds up new HTML code from the data that kses_hair() returns. It also |
| 380 | | # removes "<" and ">" characters, if there are any left. One more thing it |
| 381 | | # does is to check if the tag has a closing XHTML slash, and if it does, |
| 382 | | # it puts one in the returned code as well. |
| 383 | | ############################################################################### |
| 384 | | { |
| | 450 | /** |
| | 451 | * wp_kses_attr() - Removes all attributes, if none are allowed for this element |
| | 452 | * |
| | 453 | * If some are allowed it calls wp_kses_hair() to split them further, and then |
| | 454 | * it builds up new HTML code from the data that kses_hair() returns. It also |
| | 455 | * removes "<" and ">" characters, if there are any left. One more thing it |
| | 456 | * does is to check if the tag has a closing XHTML slash, and if it does, it |
| | 457 | * puts one in the returned code as well. |
| | 458 | * |
| | 459 | * @since 1.0.0 |
| | 460 | * |
| | 461 | * @param string $element HTML element/tag |
| | 462 | * @param string $attr HTML attributes from HTML element to closing HTML element tag |
| | 463 | * @param array $allowed_html Allowed HTML elements |
| | 464 | * @param array $allowed_protocols Allowed protocols to keep |
| | 465 | * @return string Sanitized HTML element |
| | 466 | */ |
| | 467 | function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) { |
| 438 | | function wp_kses_hair($attr, $allowed_protocols) |
| 439 | | ############################################################################### |
| 440 | | # This function does a lot of work. It parses an attribute list into an array |
| 441 | | # with attribute data, and tries to do the right thing even if it gets weird |
| 442 | | # input. It will add quotes around attribute values that don't have any quotes |
| 443 | | # or apostrophes around them, to make it easier to produce HTML code that will |
| 444 | | # conform to W3C's HTML specification. It will also remove bad URL protocols |
| 445 | | # from attribute values. |
| 446 | | ############################################################################### |
| 447 | | { |
| | 521 | /** |
| | 522 | * wp_kses_hair() - Builds an attribute list from string containing attributes. |
| | 523 | * |
| | 524 | * This function does a lot of work. It parses an attribute list into an array |
| | 525 | * with attribute data, and tries to do the right thing even if it gets weird |
| | 526 | * input. It will add quotes around attribute values that don't have any quotes |
| | 527 | * or apostrophes around them, to make it easier to produce HTML code that will |
| | 528 | * conform to W3C's HTML specification. It will also remove bad URL protocols |
| | 529 | * from attribute values. |
| | 530 | * |
| | 531 | * @since 1.0.0 |
| | 532 | * |
| | 533 | * @param string $attr Attribute list from HTML element to closing HTML element tag |
| | 534 | * @param array $allowed_protocols Allowed protocols to keep |
| | 535 | * @return array List of attributes after parsing |
| | 536 | */ |
| | 537 | function wp_kses_hair($attr, $allowed_protocols) { |
| 544 | | function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) |
| 545 | | ############################################################################### |
| 546 | | # This function performs different checks for attribute values. The currently |
| 547 | | # implemented checks are "maxlen", "minlen", "maxval", "minval" and "valueless" |
| 548 | | # with even more checks to come soon. |
| 549 | | ############################################################################### |
| 550 | | { |
| | 634 | /** |
| | 635 | * wp_kses_check_attr_val() - Performs different checks for attribute values. |
| | 636 | * |
| | 637 | * The currently implemented checks are "maxlen", "minlen", "maxval", "minval" |
| | 638 | * and "valueless" with even more checks to come soon. |
| | 639 | * |
| | 640 | * @since 1.0.0 |
| | 641 | * |
| | 642 | * @param string $value Attribute value |
| | 643 | * @param string $vless Whether the value is valueless or not. Use 'y' or 'n' |
| | 644 | * @param string $checkname What $checkvalue is checking for. |
| | 645 | * @param mixed $checkvalue What constraint the value should pass |
| | 646 | * @return bool Whether check passes (true) or not (false) |
| | 647 | */ |
| | 648 | function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) { |
| 608 | | function wp_kses_bad_protocol($string, $allowed_protocols) |
| 609 | | ############################################################################### |
| 610 | | # This function removes all non-allowed protocols from the beginning of |
| 611 | | # $string. It ignores whitespace and the case of the letters, and it does |
| 612 | | # understand HTML entities. It does its work in a while loop, so it won't be |
| 613 | | # fooled by a string like "javascript:javascript:alert(57)". |
| 614 | | ############################################################################### |
| 615 | | { |
| | 706 | /** |
| | 707 | * wp_kses_bad_protocol() - Sanitize string from bad protocols |
| | 708 | * |
| | 709 | * This function removes all non-allowed protocols from the beginning |
| | 710 | * of $string. It ignores whitespace and the case of the letters, and |
| | 711 | * it does understand HTML entities. It does its work in a while loop, |
| | 712 | * so it won't be fooled by a string like "javascript:javascript:alert(57)". |
| | 713 | * |
| | 714 | * @since 1.0.0 |
| | 715 | * |
| | 716 | * @param string $string Content to filter bad protocols from |
| | 717 | * @param array $allowed_protocols Allowed protocols to keep |
| | 718 | * @return string Filtered content |
| | 719 | */ |
| | 720 | function wp_kses_bad_protocol($string, $allowed_protocols) { |
| 639 | | function wp_kses_stripslashes($string) |
| 640 | | ############################################################################### |
| 641 | | # This function changes the character sequence \" to just " |
| 642 | | # It leaves all other slashes alone. It's really weird, but the quoting from |
| 643 | | # preg_replace(//e) seems to require this. |
| 644 | | ############################################################################### |
| 645 | | { |
| | 748 | /** |
| | 749 | * wp_kses_stripslashes() - Strips slashes from in front of quotes |
| | 750 | * |
| | 751 | * This function changes the character sequence \" to just " |
| | 752 | * It leaves all other slashes alone. It's really weird, but the |
| | 753 | * quoting from preg_replace(//e) seems to require this. |
| | 754 | * |
| | 755 | * @since 1.0.0 |
| | 756 | * |
| | 757 | * @param string $string String to strip slashes |
| | 758 | * @return string Fixed strings with quoted slashes |
| | 759 | */ |
| | 760 | function wp_kses_stripslashes($string) { |
| 649 | | function wp_kses_array_lc($inarray) |
| 650 | | ############################################################################### |
| 651 | | # This function goes through an array, and changes the keys to all lower case. |
| 652 | | ############################################################################### |
| 653 | | { |
| | 764 | /** |
| | 765 | * wp_kses_array_lc() - Goes through an array and changes the keys to all lower case. |
| | 766 | * |
| | 767 | * @since 1.0.0 |
| | 768 | * |
| | 769 | * @param array $inarray Unfiltered array |
| | 770 | * @return array Fixed array with all lowercase keys |
| | 771 | */ |
| | 772 | function wp_kses_array_lc($inarray) { |
| 678 | | function wp_kses_html_error($string) |
| 679 | | ############################################################################### |
| 680 | | # This function deals with parsing errors in wp_kses_hair(). The general plan is |
| 681 | | # to remove everything to and including some whitespace, but it deals with |
| 682 | | # quotes and apostrophes as well. |
| 683 | | ############################################################################### |
| 684 | | { |
| | 800 | /** |
| | 801 | * wp_kses_html_error() - Handles parsing errors in wp_kses_hair() |
| | 802 | * |
| | 803 | * The general plan is to remove everything to and including some |
| | 804 | * whitespace, but it deals with quotes and apostrophes as well. |
| | 805 | * |
| | 806 | * @since 1.0.0 |
| | 807 | * |
| | 808 | * @param string $string |
| | 809 | * @return string |
| | 810 | */ |
| | 811 | function wp_kses_html_error($string) { |
| 688 | | function wp_kses_bad_protocol_once($string, $allowed_protocols) |
| 689 | | ############################################################################### |
| 690 | | # This function searches for URL protocols at the beginning of $string, while |
| 691 | | # handling whitespace and HTML entities. |
| 692 | | ############################################################################### |
| 693 | | { |
| | 815 | /** |
| | 816 | * wp_kses_bad_protocol_once() - Sanitizes content from bad protocols and other characters |
| | 817 | * |
| | 818 | * This function searches for URL protocols at the beginning of $string, |
| | 819 | * while handling whitespace and HTML entities. |
| | 820 | * |
| | 821 | * @since 1.0.0 |
| | 822 | * |
| | 823 | * @param string $string Content to check for bad protocols |
| | 824 | * @param string $allowed_protocols Allowed protocols |
| | 825 | * @return string Sanitized content |
| | 826 | */ |
| | 827 | function wp_kses_bad_protocol_once($string, $allowed_protocols) { |
| 697 | | function wp_kses_bad_protocol_once2($string, $allowed_protocols) |
| 698 | | ############################################################################### |
| 699 | | # This function processes URL protocols, checks to see if they're in the white- |
| 700 | | # list or not, and returns different data depending on the answer. |
| 701 | | ############################################################################### |
| 702 | | { |
| | 831 | /** |
| | 832 | * wp_kses_bad_protocol_once2() - Callback for wp_kses_bad_protocol_once() regular expression. |
| | 833 | * |
| | 834 | * This function processes URL protocols, checks to see if they're in the |
| | 835 | * white-list or not, and returns different data depending on the answer. |
| | 836 | * |
| | 837 | * @since 1.0.0 |
| | 838 | * |
| | 839 | * @param string $string Content to check for bad protocols |
| | 840 | * @param array $allowed_protocols Allowed protocols |
| | 841 | * @return string Sanitized content |
| | 842 | */ |
| | 843 | function wp_kses_bad_protocol_once2($string, $allowed_protocols) { |
| 723 | | function wp_kses_normalize_entities($string) |
| 724 | | ############################################################################### |
| 725 | | # This function normalizes HTML entities. It will convert "AT&T" to the correct |
| 726 | | # "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on. |
| 727 | | ############################################################################### |
| 728 | | { |
| | 864 | /** |
| | 865 | * wp_kses_normalize_entities() - Converts and fixes HTML entities |
| | 866 | * |
| | 867 | * This function normalizes HTML entities. It will convert "AT&T" to the |
| | 868 | * correct "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" |
| | 869 | * and so on. |
| | 870 | * |
| | 871 | * @since 1.0.0 |
| | 872 | * |
| | 873 | * @param string $string Content to normalize entities |
| | 874 | * @return string Content with normalized entities |
| | 875 | */ |
| | 876 | function wp_kses_normalize_entities($string) { |
| 742 | | function wp_kses_normalize_entities2($i) |
| 743 | | ############################################################################### |
| 744 | | # This function helps wp_kses_normalize_entities() to only accept 16 bit values |
| 745 | | # and nothing more for &#number; entities. |
| 746 | | ############################################################################### |
| 747 | | { |
| | 890 | /** |
| | 891 | * wp_kses_normalize_entities2() - Callback for wp_kses_normalize_entities() regular expression |
| | 892 | * |
| | 893 | * This function helps wp_kses_normalize_entities() to only accept 16 bit |
| | 894 | * values and nothing more for &#number; entities. |
| | 895 | * |
| | 896 | * @since 1.0.0 |
| | 897 | * |
| | 898 | * @param int $i Number encoded entity |
| | 899 | * @return string Correctly encoded entity |
| | 900 | */ |
| | 901 | function wp_kses_normalize_entities2($i) { |
| 751 | | function wp_kses_decode_entities($string) |
| 752 | | ############################################################################### |
| 753 | | # This function decodes numeric HTML entities (A and A). It doesn't |
| 754 | | # do anything with other entities like ä, but we don't need them in the |
| 755 | | # URL protocol whitelisting system anyway. |
| 756 | | ############################################################################### |
| 757 | | { |
| | 905 | /** |
| | 906 | * wp_kses_decode_entities() - Convert all entities to their character counterparts. |
| | 907 | * |
| | 908 | * This function decodes numeric HTML entities (A and A). It |
| | 909 | * doesn't do anything with other entities like ä, but we don't need |
| | 910 | * them in the URL protocol whitelisting system anyway. |
| | 911 | * |
| | 912 | * @since 1.0.0 |
| | 913 | * |
| | 914 | * @param string $string Content to change entities |
| | 915 | * @return string Content after decoded entities |
| | 916 | */ |
| | 917 | function wp_kses_decode_entities($string) { |
| | 1014 | /** |
| | 1015 | * kses_init() - Sets up most of the Kses filters for input form content |
| | 1016 | * |
| | 1017 | * If you remove the kses_init() function from 'init' hook and |
| | 1018 | * 'set_current_user' (priority is default), then none of the |
| | 1019 | * Kses filter hooks will be added. |
| | 1020 | * |
| | 1021 | * First removes all of the Kses filters in case the current user |
| | 1022 | * does not need to have Kses filter the content. If the user does |
| | 1023 | * not have unfiltered html capability, then Kses filters are added. |
| | 1024 | * |
| | 1025 | * @uses kses_remove_filters() Removes the Kses filters |
| | 1026 | * @uses kses_init_filters() Adds the Kses filters back if the user |
| | 1027 | * does not have unfiltered HTML capability. |
| | 1028 | * @since 2.0.0 |
| | 1029 | */ |