| | 2 | /** |
| | 3 | * HTML/XHTML filter that only allows some elements and attributes |
| | 4 | * |
| | 5 | * Added wp_ prefix to avoid conflicts with existing kses users |
| | 6 | * |
| | 7 | * @version 0.2.2 |
| | 8 | * @copyright (C) 2002, 2003, 2005 |
| | 9 | * @author Ulf Harnhammar <metaur@users.sourceforge.net> |
| | 10 | * |
| | 11 | * @package External |
| | 12 | * @subpackage KSES |
| | 13 | * |
| | 14 | * @internal |
| | 15 | * *** CONTACT INFORMATION *** |
| | 16 | * E-mail: metaur at users dot sourceforge dot net |
| | 17 | * Web page: http://sourceforge.net/projects/kses |
| | 18 | * Paper mail: Ulf Harnhammar |
| | 19 | * Ymergatan 17 C |
| | 20 | * 753 25 Uppsala |
| | 21 | * SWEDEN |
| | 22 | * |
| | 23 | * [kses strips evil scripts!] |
| | 24 | */ |
| 3 | | // Added wp_ prefix to avoid conflicts with existing kses users |
| 4 | | # kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes |
| 5 | | # Copyright (C) 2002, 2003, 2005 Ulf Harnhammar |
| 6 | | # *** CONTACT INFORMATION *** |
| 7 | | # |
| 8 | | # E-mail: metaur at users dot sourceforge dot net |
| 9 | | # Web page: http://sourceforge.net/projects/kses |
| 10 | | # Paper mail: Ulf Harnhammar |
| 11 | | # Ymergatan 17 C |
| 12 | | # 753 25 Uppsala |
| 13 | | # SWEDEN |
| 14 | | # |
| 15 | | # [kses strips evil scripts!] |
| | 26 | /** |
| | 27 | * You can override this in your my-hacks.php file |
| | 28 | * You can also override this in a plugin file. The |
| | 29 | * my-hacks.php is deprecated in its usage. |
| | 30 | * |
| | 31 | * @since 1.2.0 |
| | 32 | */ |
| 222 | | function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) |
| 223 | | ############################################################################### |
| 224 | | # This function makes sure that only the allowed HTML element names, attribute |
| 225 | | # names and attribute values plus only sane HTML entities will occur in |
| 226 | | # $string. You have to remove any slashes from PHP's magic quotes before you |
| 227 | | # call this function. |
| 228 | | ############################################################################### |
| 229 | | { |
| | 251 | /** |
| | 252 | * wp_kses() - Filters content and keeps only allowable HTML elements. |
| | 253 | * |
| | 254 | * This function makes sure that only the allowed HTML element names, |
| | 255 | * attribute names and attribute values plus only sane HTML entities |
| | 256 | * will occur in $string. You have to remove any slashes from PHP's |
| | 257 | * magic quotes before you call this function. |
| | 258 | * |
| | 259 | * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', |
| | 260 | * 'news', 'irc', 'gopher', 'nntp', 'feed', and finally 'telnet. This |
| | 261 | * covers all common link protocols, except for 'javascript' which |
| | 262 | * should not be allowed for untrusted users. |
| | 263 | * |
| | 264 | * @since 1.0.0 |
| | 265 | * |
| | 266 | * @param string $string Content to filter through kses |
| | 267 | * @param array $allowed_html List of allowed HTML elements |
| | 268 | * @param array $allowed_protocols Optional. Allowed protocol in links. |
| | 269 | * @return string Filtered content with only allowed HTML elements |
| | 270 | */ |
| | 271 | function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) { |
| 238 | | function wp_kses_hook($string, $allowed_html, $allowed_protocols) |
| 239 | | ############################################################################### |
| 240 | | # You add any kses hooks here. |
| 241 | | ############################################################################### |
| 242 | | { |
| | 280 | /** |
| | 281 | * wp_kses_hook() - You add any kses hooks here. |
| | 282 | * |
| | 283 | * There is currently only one kses WordPress hook and it is |
| | 284 | * called here. All parameters are passed to the hooks and |
| | 285 | * expected to recieve a string. |
| | 286 | * |
| | 287 | * @since 1.0.0 |
| | 288 | * |
| | 289 | * @param string $string Content to filter through kses |
| | 290 | * @param array $allowed_html List of allowed HTML elements |
| | 291 | * @param array $allowed_protocols Allowed protocol in links |
| | 292 | * @return string Filtered content through 'pre_kses' hook |
| | 293 | */ |
| | 294 | function wp_kses_hook($string, $allowed_html, $allowed_protocols) { |
| 255 | | function wp_kses_split($string, $allowed_html, $allowed_protocols) |
| 256 | | ############################################################################### |
| 257 | | # This function searches for HTML tags, no matter how malformed. It also |
| 258 | | # matches stray ">" characters. |
| 259 | | ############################################################################### |
| 260 | | { |
| | 310 | /** |
| | 311 | * wp_kses_split() - Searches for HTML tags, no matter how malformed |
| | 312 | * |
| | 313 | * It also matches stray ">" characters. |
| | 314 | * |
| | 315 | * @since 1.0.0 |
| | 316 | * |
| | 317 | * @param string $string Content to filter |
| | 318 | * @param array $allowed_html Allowed HTML elements |
| | 319 | * @param array $allowed_protocols Allowed protocols to keep |
| | 320 | * @return string Content with fixed HTML tags |
| | 321 | */ |
| | 322 | function wp_kses_split($string, $allowed_html, $allowed_protocols) { |
| 265 | | function wp_kses_split2($string, $allowed_html, $allowed_protocols) |
| 266 | | ############################################################################### |
| 267 | | # This function does a lot of work. It rejects some very malformed things |
| 268 | | # like <:::>. It returns an empty string, if the element isn't allowed (look |
| 269 | | # ma, no strip_tags()!). Otherwise it splits the tag into an element and an |
| 270 | | # attribute list. |
| 271 | | ############################################################################### |
| 272 | | { |
| | 327 | /** |
| | 328 | * wp_kses_split2() - Callback for wp_kses_split for fixing malformed HTML tags |
| | 329 | * |
| | 330 | * This function does a lot of work. It rejects some very malformed things |
| | 331 | * like <:::>. It returns an empty string, if the element isn't allowed (look |
| | 332 | * ma, no strip_tags()!). Otherwise it splits the tag into an element and an |
| | 333 | * attribute list. |
| | 334 | * |
| | 335 | * After the tag is split into an element and an attribute list, it is run |
| | 336 | * through another filter which will remove illegal attributes and once |
| | 337 | * that is completed, will be returned. |
| | 338 | * |
| | 339 | * @since 1.0.0 |
| | 340 | * @uses wp_kses_attr() |
| | 341 | * |
| | 342 | * @param string $string Content to filter |
| | 343 | * @param array $allowed_html Allowed HTML elements |
| | 344 | * @param array $allowed_protocols Allowed protocols to keep |
| | 345 | * @return string Fixed HTML element |
| | 346 | */ |
| | 347 | function wp_kses_split2($string, $allowed_html, $allowed_protocols) { |
| | 383 | /** |
| | 384 | * wp_kses_attr() - Removes all attributes, if none are allowed for this element |
| | 385 | * |
| | 386 | * If some are allowed it calls wp_kses_hair() to split them further, and then |
| | 387 | * it builds up new HTML code from the data that kses_hair() returns. It also |
| | 388 | * removes "<" and ">" characters, if there are any left. One more thing it |
| | 389 | * does is to check if the tag has a closing XHTML slash, and if it does, it |
| | 390 | * puts one in the returned code as well. |
| | 391 | * |
| | 392 | * @since 1.0.0 |
| | 393 | * |
| | 394 | * @param string $element HTML element/tag |
| | 395 | * @param string $attr HTML attributes from HTML element to closing HTML element tag |
| | 396 | * @param array $allowed_html Allowed HTML elements |
| | 397 | * @param array $allowed_protocols Allowed protocols to keep |
| | 398 | * @return string Sanitized HTML element |
| | 399 | */ |
| 309 | | ############################################################################### |
| 310 | | # This function removes all attributes, if none are allowed for this element. |
| 311 | | # If some are allowed it calls wp_kses_hair() to split them further, and then it |
| 312 | | # builds up new HTML code from the data that kses_hair() returns. It also |
| 313 | | # removes "<" and ">" characters, if there are any left. One more thing it |
| 314 | | # does is to check if the tag has a closing XHTML slash, and if it does, |
| 315 | | # it puts one in the returned code as well. |
| 316 | | ############################################################################### |
| | 455 | /** |
| | 456 | * wp_kses_hair() - Builds an attribute list from string containing attributes. |
| | 457 | * |
| | 458 | * This function does a lot of work. It parses an attribute list into an array |
| | 459 | * with attribute data, and tries to do the right thing even if it gets weird |
| | 460 | * input. It will add quotes around attribute values that don't have any quotes |
| | 461 | * or apostrophes around them, to make it easier to produce HTML code that will |
| | 462 | * conform to W3C's HTML specification. It will also remove bad URL protocols |
| | 463 | * from attribute values. |
| | 464 | * |
| | 465 | * @since 1.0.0 |
| | 466 | * |
| | 467 | * @param string $attr Attribute list from HTML element to closing HTML element tag |
| | 468 | * @param array $allowed_protocols Allowed protocols to keep |
| | 469 | * @return array List of attributes after parsing |
| | 470 | */ |
| 372 | | ############################################################################### |
| 373 | | # This function does a lot of work. It parses an attribute list into an array |
| 374 | | # with attribute data, and tries to do the right thing even if it gets weird |
| 375 | | # input. It will add quotes around attribute values that don't have any quotes |
| 376 | | # or apostrophes around them, to make it easier to produce HTML code that will |
| 377 | | # conform to W3C's HTML specification. It will also remove bad URL protocols |
| 378 | | # from attribute values. |
| 379 | | ############################################################################### |
| | 569 | /** |
| | 570 | * wp_kses_check_attr_val() - Performs different checks for attribute values. |
| | 571 | * |
| | 572 | * The currently implemented checks are "maxlen", "minlen", "maxval", "minval" |
| | 573 | * and "valueless" with even more checks to come soon. |
| | 574 | * |
| | 575 | * @since 1.0.0 |
| | 576 | * |
| | 577 | * @param string $value Attribute value |
| | 578 | * @param string $vless Whether the value is valueless or not. Use 'y' or 'n' |
| | 579 | * @param string $checkname What $checkvalue is checking for. |
| | 580 | * @param mixed $checkvalue What constraint the value should pass |
| | 581 | * @return bool Whether check passes (true) or not (false) |
| | 582 | */ |
| | 642 | /** |
| | 643 | * wp_kses_bad_protocol() - Sanitize string from bad protocols |
| | 644 | * |
| | 645 | * This function removes all non-allowed protocols from the beginning |
| | 646 | * of $string. It ignores whitespace and the case of the letters, and |
| | 647 | * it does understand HTML entities. It does its work in a while loop, |
| | 648 | * so it won't be fooled by a string like "javascript:javascript:alert(57)". |
| | 649 | * |
| | 650 | * @since 1.0.0 |
| | 651 | * |
| | 652 | * @param string $string Content to filter bad protocols from |
| | 653 | * @param array $allowed_protocols Allowed protocols to keep |
| | 654 | * @return string Filtered content |
| | 655 | */ |
| 611 | | function wp_kses_html_error($string) |
| 612 | | ############################################################################### |
| 613 | | # This function deals with parsing errors in wp_kses_hair(). The general plan is |
| 614 | | # to remove everything to and including some whitespace, but it deals with |
| 615 | | # quotes and apostrophes as well. |
| 616 | | ############################################################################### |
| 617 | | { |
| | 740 | /** |
| | 741 | * wp_kses_html_error() - Handles parsing errors in wp_kses_hair() |
| | 742 | * |
| | 743 | * The general plan is to remove everything to and including some |
| | 744 | * whitespace, but it deals with quotes and apostrophes as well. |
| | 745 | * |
| | 746 | * @since 1.0.0 |
| | 747 | * |
| | 748 | * @param string $string |
| | 749 | * @return string |
| | 750 | */ |
| | 751 | function wp_kses_html_error($string) { |
| 621 | | function wp_kses_bad_protocol_once($string, $allowed_protocols) |
| 622 | | ############################################################################### |
| 623 | | # This function searches for URL protocols at the beginning of $string, while |
| 624 | | # handling whitespace and HTML entities. |
| 625 | | ############################################################################### |
| 626 | | { |
| | 755 | /** |
| | 756 | * wp_kses_bad_protocol_once() - Sanitizes content from bad protocols and other characters |
| | 757 | * |
| | 758 | * This function searches for URL protocols at the beginning of $string, |
| | 759 | * while handling whitespace and HTML entities. |
| | 760 | * |
| | 761 | * @since 1.0.0 |
| | 762 | * |
| | 763 | * @param string $string Content to check for bad protocols |
| | 764 | * @param string $allowed_protocols Allowed protocols |
| | 765 | * @return string Sanitized content |
| | 766 | */ |
| | 767 | function wp_kses_bad_protocol_once($string, $allowed_protocols) { |
| 630 | | function wp_kses_bad_protocol_once2($string, $allowed_protocols) |
| 631 | | ############################################################################### |
| 632 | | # This function processes URL protocols, checks to see if they're in the white- |
| 633 | | # list or not, and returns different data depending on the answer. |
| 634 | | ############################################################################### |
| 635 | | { |
| | 771 | /** |
| | 772 | * wp_kses_bad_protocol_once2() - Callback for wp_kses_bad_protocol_once() regular expression. |
| | 773 | * |
| | 774 | * This function processes URL protocols, checks to see if they're in the |
| | 775 | * white-list or not, and returns different data depending on the answer. |
| | 776 | * |
| | 777 | * @since 1.0.0 |
| | 778 | * |
| | 779 | * @param string $string Content to check for bad protocols |
| | 780 | * @param array $allowed_protocols Allowed protocols |
| | 781 | * @return string Sanitized content |
| | 782 | */ |
| | 783 | function wp_kses_bad_protocol_once2($string, $allowed_protocols) { |
| 656 | | function wp_kses_normalize_entities($string) |
| 657 | | ############################################################################### |
| 658 | | # This function normalizes HTML entities. It will convert "AT&T" to the correct |
| 659 | | # "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on. |
| 660 | | ############################################################################### |
| 661 | | { |
| | 804 | /** |
| | 805 | * wp_kses_normalize_entities() - Converts and fixes HTML entities |
| | 806 | * |
| | 807 | * This function normalizes HTML entities. It will convert "AT&T" to the |
| | 808 | * correct "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" |
| | 809 | * and so on. |
| | 810 | * |
| | 811 | * @since 1.0.0 |
| | 812 | * |
| | 813 | * @param string $string Content to normalize entities |
| | 814 | * @return string Content with normalized entities |
| | 815 | */ |
| | 816 | function wp_kses_normalize_entities($string) { |
| 675 | | function wp_kses_normalize_entities2($i) |
| 676 | | ############################################################################### |
| 677 | | # This function helps wp_kses_normalize_entities() to only accept 16 bit values |
| 678 | | # and nothing more for &#number; entities. |
| 679 | | ############################################################################### |
| 680 | | { |
| | 830 | /** |
| | 831 | * wp_kses_normalize_entities2() - Callback for wp_kses_normalize_entities() regular expression |
| | 832 | * |
| | 833 | * This function helps wp_kses_normalize_entities() to only accept 16 bit |
| | 834 | * values and nothing more for &#number; entities. |
| | 835 | * |
| | 836 | * @since 1.0.0 |
| | 837 | * |
| | 838 | * @param int $i Number encoded entity |
| | 839 | * @return string Correctly encoded entity |
| | 840 | */ |
| | 841 | function wp_kses_normalize_entities2($i) { |
| 684 | | function wp_kses_decode_entities($string) |
| 685 | | ############################################################################### |
| 686 | | # This function decodes numeric HTML entities (A and A). It doesn't |
| 687 | | # do anything with other entities like ä, but we don't need them in the |
| 688 | | # URL protocol whitelisting system anyway. |
| 689 | | ############################################################################### |
| 690 | | { |
| | 845 | /** |
| | 846 | * wp_kses_decode_entities() - Convert all entities to their character counterparts. |
| | 847 | * |
| | 848 | * This function decodes numeric HTML entities (A and A). It |
| | 849 | * doesn't do anything with other entities like ä, but we don't need |
| | 850 | * them in the URL protocol whitelisting system anyway. |
| | 851 | * |
| | 852 | * @since 1.0.0 |
| | 853 | * |
| | 854 | * @param string $string Content to change entities |
| | 855 | * @return string Content after decoded entities |
| | 856 | */ |
| | 857 | function wp_kses_decode_entities($string) { |
| | 954 | /** |
| | 955 | * kses_init() - Sets up most of the Kses filters for input form content |
| | 956 | * |
| | 957 | * If you remove the kses_init() function from 'init' hook and |
| | 958 | * 'set_current_user' (priority is default), then none of the |
| | 959 | * Kses filter hooks will be added. |
| | 960 | * |
| | 961 | * First removes all of the Kses filters in case the current user |
| | 962 | * does not need to have Kses filter the content. If the user does |
| | 963 | * not have unfiltered html capability, then Kses filters are added. |
| | 964 | * |
| | 965 | * @uses kses_remove_filters() Removes the Kses filters |
| | 966 | * @uses kses_init_filters() Adds the Kses filters back if the user |
| | 967 | * does not have unfiltered HTML capability. |
| | 968 | * @since 2.0.0 |
| | 969 | */ |