Changeset 47122 for trunk/src/wp-includes/formatting.php
- Timestamp:
- 01/29/2020 12:43:23 AM (6 years ago)
- File:
-
- 1 edited
-
trunk/src/wp-includes/formatting.php (modified) (105 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/formatting.php
r47088 r47122 124 124 $default_no_texturize_shortcodes = array( 'code' ); 125 125 126 // if a plugin has provided an autocorrect array, use it126 // If a plugin has provided an autocorrect array, use it. 127 127 if ( isset( $wp_cockneyreplace ) ) { 128 128 $cockney = array_keys( $wp_cockneyreplace ); … … 131 131 /* 132 132 * translators: This is a comma-separated list of words that defy the syntax of quotations in normal use, 133 * for example... 'We do not have enough words yet' ... is a typical quoted phrase.But when we write133 * for example... 'We do not have enough words yet'... is a typical quoted phrase. But when we write 134 134 * lines of code 'til we have enough of 'em, then we need to insert apostrophes instead of quotes. 135 135 */ … … 182 182 } 183 183 184 // Quoted Numbers like '0.42'184 // Quoted numbers like '0.42'. 185 185 if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) { 186 186 $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $closing_single_quote; … … 192 192 } 193 193 194 // Apostrophe in a word. No spaces, double apostrophes, or other punctuation.194 // Apostrophe in a word. No spaces, double apostrophes, or other punctuation. 195 195 if ( "'" !== $apos ) { 196 196 $dynamic[ '/(?<!' . $spaces . ')\'(?!\Z|[.,:;!?"\'(){}[\]\-]|&[lg]t;|' . $spaces . ')/' ] = $apos_flag; … … 201 201 $dynamic = array(); 202 202 203 // Quoted Numbers like "42"203 // Quoted numbers like "42". 204 204 if ( '"' !== $opening_quote && '"' !== $closing_quote ) { 205 205 $dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $open_q_flag . '$1' . $closing_quote; … … 215 215 $dynamic = array(); 216 216 217 // Dashes and spaces 217 // Dashes and spaces. 218 218 $dynamic['/---/'] = $em_dash; 219 219 $dynamic[ '/(?<=^|' . $spaces . ')--(?=$|' . $spaces . ')/' ] = $em_dash; … … 225 225 } 226 226 227 // Must do this every time in case plugins use these filters in a context sensitive manner 227 // Must do this every time in case plugins use these filters in a context sensitive manner. 228 228 /** 229 229 * Filters the list of HTML elements not to texturize. … … 272 272 } 273 273 } elseif ( '' === trim( $curl ) ) { 274 // This is a newline between delimiters. Performance improves when we check this.274 // This is a newline between delimiters. Performance improves when we check this. 275 275 continue; 276 276 … … 286 286 } 287 287 } elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) ) { 288 // This is neither a delimiter, nor is this content inside of no_texturize pairs. Do texturize.288 // This is neither a delimiter, nor is this content inside of no_texturize pairs. Do texturize. 289 289 290 290 $curl = str_replace( $static_characters, $static_replacements, $curl ); … … 305 305 } 306 306 307 // 9x9 (times), but never 0x9999 307 // 9x9 (times), but never 0x9999. 308 308 if ( 1 === preg_match( '/(?<=\d)x\d/', $curl ) ) { 309 309 // Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one! … … 349 349 $sentence = preg_replace( $quote_pattern, $flag, $sentence, -1, $count ); 350 350 if ( $count > 1 ) { 351 // This sentence appears to have multiple closing quotes. Attempt Vulcan logic.351 // This sentence appears to have multiple closing quotes. Attempt Vulcan logic. 352 352 $sentence = preg_replace( $flag_no_digit, $close_quote, $sentence, -1, $count2 ); 353 353 if ( 0 === $count2 ) { … … 373 373 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); 374 374 } else { 375 // No closing quotes found. Just run primes pattern.375 // No closing quotes found. Just run primes pattern. 376 376 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); 377 377 } … … 429 429 /* 430 430 * This disables texturize until we find a closing tag of our type 431 * (e.g. <pre>) even if there was invalid nesting before that 431 * (e.g. <pre>) even if there was invalid nesting before that. 432 432 * 433 433 * Example: in the case <pre>sadsadasd</code>"baba"</pre> 434 * "baba" won't be texturize 434 * "baba" won't be texturized. 435 435 */ 436 436 … … 479 479 $start = strpos( $pee_part, '<pre' ); 480 480 481 // Malformed html?481 // Malformed HTML? 482 482 if ( $start === false ) { 483 483 $pee .= $pee_part; … … 494 494 $pee .= $last_pee; 495 495 } 496 // Change multiple <br> s into two line breaks, which will turn into paragraphs.496 // Change multiple <br>'s into two line breaks, which will turn into paragraphs. 497 497 $pee = preg_replace( '|<br\s*/?>\s*<br\s*/?>|', "\n\n", $pee ); 498 498 … … 677 677 . '(?' // Conditional expression follows. 678 678 . $escaped // Find end of escaped element. 679 . '|' // ... else...679 . '|' // ...else... 680 680 . '[^>]*>?' // Find end of normal element. 681 681 . ')' … … 697 697 * @staticvar string $html_regex 698 698 * 699 * @param string $shortcode_regex The result from _get_wptexturize_shortcode_regex(). Optional.699 * @param string $shortcode_regex The result from _get_wptexturize_shortcode_regex(). Optional. 700 700 * @return string The regular expression 701 701 */ … … 747 747 // phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound -- don't remove regex indentation 748 748 $regex = 749 '\[' // Find start of shortcode.750 . '[\/\[]?' // Shortcodes may begin with [/ or [[ 749 '\[' // Find start of shortcode. 750 . '[\/\[]?' // Shortcodes may begin with [/ or [[. 751 751 . $tagregexp // Only match registered shortcodes, because performance. 752 752 . '(?:' … … 756 756 . ')*+' // Possessive critical. 757 757 . '\]' // Find end of shortcode. 758 . '\]?'; // Shortcodes may end with ]] 758 . '\]?'; // Shortcodes may end with ]]. 759 759 // phpcs:enable 760 760 … … 851 851 $pattern = 852 852 '/' 853 . '<p>' // Opening paragraph 854 . '(?:' . $spaces . ')*+' // Optional leading whitespace 855 . '(' // 1: The shortcode 856 . '\\[' // Opening bracket 857 . "($tagregexp)" // 2: Shortcode name 858 . '(?![\\w-])' // Not followed by word character or hyphen 859 // Unroll the loop: Inside the opening shortcode tag 860 . '[^\\]\\/]*' // Not a closing bracket or forward slash 853 . '<p>' // Opening paragraph. 854 . '(?:' . $spaces . ')*+' // Optional leading whitespace. 855 . '(' // 1: The shortcode. 856 . '\\[' // Opening bracket. 857 . "($tagregexp)" // 2: Shortcode name. 858 . '(?![\\w-])' // Not followed by word character or hyphen. 859 // Unroll the loop: Inside the opening shortcode tag. 860 . '[^\\]\\/]*' // Not a closing bracket or forward slash. 861 861 . '(?:' 862 . '\\/(?!\\])' // A forward slash not followed by a closing bracket 863 . '[^\\]\\/]*' // Not a closing bracket or forward slash 862 . '\\/(?!\\])' // A forward slash not followed by a closing bracket. 863 . '[^\\]\\/]*' // Not a closing bracket or forward slash. 864 864 . ')*?' 865 865 . '(?:' 866 . '\\/\\]' // Self closing tag and closing bracket 866 . '\\/\\]' // Self closing tag and closing bracket. 867 867 . '|' 868 . '\\]' // Closing bracket 869 . '(?:' // Unroll the loop: Optionally, anything between the opening and closing shortcode tags 870 . '[^\\[]*+' // Not an opening bracket 868 . '\\]' // Closing bracket. 869 . '(?:' // Unroll the loop: Optionally, anything between the opening and closing shortcode tags. 870 . '[^\\[]*+' // Not an opening bracket. 871 871 . '(?:' 872 . '\\[(?!\\/\\2\\])' // An opening bracket not followed by the closing shortcode tag 873 . '[^\\[]*+' // Not an opening bracket 872 . '\\[(?!\\/\\2\\])' // An opening bracket not followed by the closing shortcode tag. 873 . '[^\\[]*+' // Not an opening bracket. 874 874 . ')*+' 875 . '\\[\\/\\2\\]' // Closing shortcode tag 875 . '\\[\\/\\2\\]' // Closing shortcode tag. 876 876 . ')?' 877 877 . ')' 878 878 . ')' 879 . '(?:' . $spaces . ')*+' // optional trailing whitespace880 . '<\\/p>' // closing paragraph879 . '(?:' . $spaces . ')*+' // Optional trailing whitespace. 880 . '<\\/p>' // Closing paragraph. 881 881 . '/'; 882 882 // phpcs:enable … … 916 916 $n = 5; // 1111110b 917 917 } else { 918 return false; // Does not match any model 918 return false; // Does not match any model. 919 919 } 920 920 for ( $j = 0; $j < $n; $j++ ) { // n bytes matching 10bbbbbb follow ? … … 957 957 } 958 958 959 // Don't bother if there are no specialchars - saves some processing 959 // Don't bother if there are no specialchars - saves some processing. 960 960 if ( ! preg_match( '/[&<>"\']/', $string ) ) { 961 961 return $string; 962 962 } 963 963 964 // Account for the previous behaviour of the function when the $quote_style is not an accepted value 964 // Account for the previous behaviour of the function when the $quote_style is not an accepted value. 965 965 if ( empty( $quote_style ) ) { 966 966 $quote_style = ENT_NOQUOTES; … … 969 969 } 970 970 971 // Store the site charset as a static to avoid multiple calls to wp_load_alloptions() 971 // Store the site charset as a static to avoid multiple calls to wp_load_alloptions(). 972 972 if ( ! $charset ) { 973 973 static $_charset = null; … … 1035 1035 } 1036 1036 1037 // Don't bother if there are no entities - saves a lot of processing 1037 // Don't bother if there are no entities - saves a lot of processing. 1038 1038 if ( strpos( $string, '&' ) === false ) { 1039 1039 return $string; 1040 1040 } 1041 1041 1042 // Match the previous behaviour of _wp_specialchars() when the $quote_style is not an accepted value 1042 // Match the previous behaviour of _wp_specialchars() when the $quote_style is not an accepted value. 1043 1043 if ( empty( $quote_style ) ) { 1044 1044 $quote_style = ENT_NOQUOTES; … … 1047 1047 } 1048 1048 1049 // More complete than get_html_translation_table( HTML_SPECIALCHARS ) 1049 // More complete than get_html_translation_table( HTML_SPECIALCHARS ). 1050 1050 $single = array( 1051 1051 ''' => '\'', … … 1095 1095 } 1096 1096 1097 // Remove zero padding on numeric entities 1097 // Remove zero padding on numeric entities. 1098 1098 $string = preg_replace( array_keys( $translation_preg ), array_values( $translation_preg ), $string ); 1099 1099 1100 // Replace characters according to translation table 1100 // Replace characters according to translation table. 1101 1101 return strtr( $string, $translation ); 1102 1102 } … … 1121 1121 } 1122 1122 1123 // Store the site charset as a static to avoid multiple calls to get_option() 1123 // Store the site charset as a static to avoid multiple calls to get_option(). 1124 1124 static $is_utf8 = null; 1125 1125 if ( ! isset( $is_utf8 ) ) { … … 1130 1130 } 1131 1131 1132 // Check for support for utf8 in the installed PCRE library once and store the result in a static 1132 // Check for support for utf8 in the installed PCRE library once and store the result in a static. 1133 1133 static $utf8_pcre = null; 1134 1134 if ( ! isset( $utf8_pcre ) ) { … … 1136 1136 $utf8_pcre = @preg_match( '/^./u', 'a' ); 1137 1137 } 1138 // We can't demand utf8 in the PCRE installation, so just return the string in those cases 1138 // We can't demand utf8 in the PCRE installation, so just return the string in those cases. 1139 1139 if ( ! $utf8_pcre ) { 1140 1140 return $string; 1141 1141 } 1142 1142 1143 // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged -- preg_match fails when it encounters invalid UTF8 in $string 1143 // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged -- preg_match fails when it encounters invalid UTF8 in $string. 1144 1144 if ( 1 === @preg_match( '/^./us', $string ) ) { 1145 1145 return $string; 1146 1146 } 1147 1147 1148 // Attempt to strip the bad chars if requested (not recommended) 1148 // Attempt to strip the bad chars if requested (not recommended). 1149 1149 if ( $strip && function_exists( 'iconv' ) ) { 1150 1150 return iconv( 'utf-8', 'utf-8', $string ); … … 1608 1608 if ( seems_utf8( $string ) ) { 1609 1609 $chars = array( 1610 // Decompositions for Latin-1 Supplement 1610 // Decompositions for Latin-1 Supplement. 1611 1611 'ª' => 'a', 1612 1612 'º' => 'o', … … 1673 1673 'ÿ' => 'y', 1674 1674 'Ø' => 'O', 1675 // Decompositions for Latin Extended-A 1675 // Decompositions for Latin Extended-A. 1676 1676 'Ā' => 'A', 1677 1677 'ā' => 'a', … … 1802 1802 'ž' => 'z', 1803 1803 'ſ' => 's', 1804 // Decompositions for Latin Extended-B 1804 // Decompositions for Latin Extended-B. 1805 1805 'Ș' => 'S', 1806 1806 'ș' => 's', 1807 1807 'Ț' => 'T', 1808 1808 'ț' => 't', 1809 // Euro Sign1809 // Euro sign. 1810 1810 '€' => 'E', 1811 // GBP (Pound) Sign1811 // GBP (Pound) sign. 1812 1812 '£' => '', 1813 // Vowels with diacritic (Vietnamese) 1814 // unmarked1813 // Vowels with diacritic (Vietnamese). 1814 // Unmarked. 1815 1815 'Ơ' => 'O', 1816 1816 'ơ' => 'o', 1817 1817 'Ư' => 'U', 1818 1818 'ư' => 'u', 1819 // grave accent1819 // Grave accent. 1820 1820 'Ầ' => 'A', 1821 1821 'ầ' => 'a', … … 1832 1832 'Ỳ' => 'Y', 1833 1833 'ỳ' => 'y', 1834 // hook1834 // Hook. 1835 1835 'Ả' => 'A', 1836 1836 'ả' => 'a', … … 1857 1857 'Ỷ' => 'Y', 1858 1858 'ỷ' => 'y', 1859 // tilde1859 // Tilde. 1860 1860 'Ẫ' => 'A', 1861 1861 'ẫ' => 'a', … … 1874 1874 'Ỹ' => 'Y', 1875 1875 'ỹ' => 'y', 1876 // acute accent1876 // Acute accent. 1877 1877 'Ấ' => 'A', 1878 1878 'ấ' => 'a', … … 1887 1887 'Ứ' => 'U', 1888 1888 'ứ' => 'u', 1889 // dot below1889 // Dot below. 1890 1890 'Ạ' => 'A', 1891 1891 'ạ' => 'a', … … 1912 1912 'Ỵ' => 'Y', 1913 1913 'ỵ' => 'y', 1914 // Vowels with diacritic (Chinese, Hanyu Pinyin) 1914 // Vowels with diacritic (Chinese, Hanyu Pinyin). 1915 1915 'ɑ' => 'a', 1916 // macron1916 // Macron. 1917 1917 'Ǖ' => 'U', 1918 1918 'ǖ' => 'u', 1919 // acute accent1919 // Acute accent. 1920 1920 'Ǘ' => 'U', 1921 1921 'ǘ' => 'u', 1922 // caron1922 // Caron. 1923 1923 'Ǎ' => 'A', 1924 1924 'ǎ' => 'a', … … 1931 1931 'Ǚ' => 'U', 1932 1932 'ǚ' => 'u', 1933 // grave accent1933 // Grave accent. 1934 1934 'Ǜ' => 'U', 1935 1935 'ǜ' => 'u', 1936 1936 ); 1937 1937 1938 // Used for locale-specific rules 1938 // Used for locale-specific rules. 1939 1939 $locale = get_locale(); 1940 1940 … … 1964 1964 } else { 1965 1965 $chars = array(); 1966 // Assume ISO-8859-1 if not UTF-8 1966 // Assume ISO-8859-1 if not UTF-8. 1967 1967 $chars['in'] = "\x80\x83\x8a\x8e\x9a\x9e" 1968 1968 . "\x9f\xa2\xa5\xb5\xc0\xc1\xc2" … … 2029 2029 } 2030 2030 2031 // Split the filename into a base and extension[s] 2031 // Split the filename into a base and extension[s]. 2032 2032 $parts = explode( '.', $filename ); 2033 2033 2034 // Return if only one extension 2034 // Return if only one extension. 2035 2035 if ( count( $parts ) <= 2 ) { 2036 2036 /** … … 2045 2045 } 2046 2046 2047 // Process multiple extensions 2047 // Process multiple extensions. 2048 2048 $filename = array_shift( $parts ); 2049 2049 $extension = array_pop( $parts ); … … 2094 2094 $username = wp_strip_all_tags( $username ); 2095 2095 $username = remove_accents( $username ); 2096 // Kill octets 2096 // Kill octets. 2097 2097 $username = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '', $username ); 2098 $username = preg_replace( '/&.+?;/', '', $username ); // Kill entities 2098 // Kill entities. 2099 $username = preg_replace( '/&.+?;/', '', $username ); 2099 2100 2100 2101 // If strict, reduce to ASCII for max portability. … … 2104 2105 2105 2106 $username = trim( $username ); 2106 // Consolidate contiguous whitespace 2107 // Consolidate contiguous whitespace. 2107 2108 $username = preg_replace( '|\s+|', ' ', $username ); 2108 2109 … … 2230 2231 2231 2232 if ( 'save' == $context ) { 2232 // Convert nbsp, ndash and mdash to hyphens2233 // Convert  , &ndash, and &mdash to hyphens. 2233 2234 $title = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $title ); 2234 // Convert nbsp, ndash and mdash HTML entities to hyphens2235 // Convert  , &ndash, and &mdash HTML entities to hyphens. 2235 2236 $title = str_replace( array( ' ', ' ', '–', '–', '—', '—' ), '-', $title ); 2236 // Convert forward slash to hyphen 2237 // Convert forward slash to hyphen. 2237 2238 $title = str_replace( '/', '-', $title ); 2238 2239 2239 // Strip these characters entirely 2240 // Strip these characters entirely. 2240 2241 $title = str_replace( 2241 2242 array( 2242 // soft hyphens2243 // Soft hyphens. 2243 2244 '%c2%ad', 2244 // iexcl and iquest2245 // ¡ and ¿. 2245 2246 '%c2%a1', 2246 2247 '%c2%bf', 2247 // angle quotes2248 // Angle quotes. 2248 2249 '%c2%ab', 2249 2250 '%c2%bb', 2250 2251 '%e2%80%b9', 2251 2252 '%e2%80%ba', 2252 // curly quotes2253 // Curly quotes. 2253 2254 '%e2%80%98', 2254 2255 '%e2%80%99', … … 2259 2260 '%e2%80%9e', 2260 2261 '%e2%80%9f', 2261 // copy, reg, deg, hellip and trade2262 // ©, ®, °, &hellip, and &trade. 2262 2263 '%c2%a9', 2263 2264 '%c2%ae', … … 2265 2266 '%e2%80%a6', 2266 2267 '%e2%84%a2', 2267 // acute accents2268 // Acute accents. 2268 2269 '%c2%b4', 2269 2270 '%cb%8a', 2270 2271 '%cc%81', 2271 2272 '%cd%81', 2272 // grave accent, macron, caron2273 // Grave accent, macron, caron. 2273 2274 '%cc%80', 2274 2275 '%cc%84', … … 2279 2280 ); 2280 2281 2281 // Convert times to x2282 // Convert × to 'x'. 2282 2283 $title = str_replace( '%c3%97', 'x', $title ); 2283 2284 } 2284 2285 2285 $title = preg_replace( '/&.+?;/', '', $title ); // kill entities 2286 // Kill entities. 2287 $title = preg_replace( '/&.+?;/', '', $title ); 2286 2288 $title = str_replace( '.', '-', $title ); 2287 2289 … … 2330 2332 */ 2331 2333 function sanitize_html_class( $class, $fallback = '' ) { 2332 // Strip out any % encoded octets2334 // Strip out any %-encoded octets. 2333 2335 $sanitized = preg_replace( '|%[a-fA-F0-9][a-fA-F0-9]|', '', $class ); 2334 2336 2335 // Limit to A-Z,a-z,0-9,_,-2337 // Limit to A-Z, a-z, 0-9, '_', '-'. 2336 2338 $sanitized = preg_replace( '/[^A-Za-z0-9_-]/', '', $sanitized ); 2337 2339 … … 2382 2384 function convert_invalid_entities( $content ) { 2383 2385 $wp_htmltranswinuni = array( 2384 '€' => '€', // the Euro sign2386 '€' => '€', // The Euro sign. 2385 2387 '' => '', 2386 '‚' => '‚', // these are Windows CP1252 specific characters2387 'ƒ' => 'ƒ', // they would look weird on non-Windows browsers2388 '‚' => '‚', // These are Windows CP1252 specific characters. 2389 'ƒ' => 'ƒ', // They would look weird on non-Windows browsers. 2388 2390 '„' => '„', 2389 2391 '…' => '…', … … 2464 2466 $tagqueue = ''; 2465 2467 $newtext = ''; 2466 // Known single-entity/self-closing tags 2468 // Known single-entity/self-closing tags. 2467 2469 $single_tags = array( 'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param', 'source' ); 2468 // Tags that can be immediately nested within themselves 2470 // Tags that can be immediately nested within themselves. 2469 2471 $nestable_tags = array( 'blockquote', 'div', 'object', 'q', 'span' ); 2470 2472 2471 // WP bug fix for comments - in case you REALLY meant to type '< !--' 2473 // WP bug fix for comments - in case you REALLY meant to type '< !--'. 2472 2474 $text = str_replace( '< !--', '< !--', $text ); 2473 // WP bug fix for LOVE <3 (and other situations with '<' before a number) 2475 // WP bug fix for LOVE <3 (and other situations with '<' before a number). 2474 2476 $text = preg_replace( '#<([0-9]{1})#', '<$1', $text ); 2475 2477 … … 2527 2529 // Clear the shifter. 2528 2530 $tagqueue = ''; 2529 if ( $has_leading_slash ) { // End Tag.2531 if ( $has_leading_slash ) { // End tag. 2530 2532 // If too many closing tags. 2531 2533 if ( $stacksize <= 0 ) { … … 2535 2537 // If stacktop value = tag close value, then pop. 2536 2538 } elseif ( $tagstack[ $stacksize - 1 ] === $tag ) { // Found closing tag. 2537 $tag = '</' . $tag . '>'; // Close Tag.2539 $tag = '</' . $tag . '>'; // Close tag. 2538 2540 array_pop( $tagstack ); 2539 2541 $stacksize--; … … 2551 2553 $tag = ''; 2552 2554 } 2553 } else { // Begin Tag.2555 } else { // Begin tag. 2554 2556 if ( $has_self_closer ) { // If it presents itself as a self-closing tag... 2555 // ...but it isn't a known single-entity self-closing tag, then don't let it be treated as such and2556 // immediately close it with a closing tag (the tag will encapsulate no text as a result)2557 // ...but it isn't a known single-entity self-closing tag, then don't let it be treated as such 2558 // and immediately close it with a closing tag (the tag will encapsulate no text as a result). 2557 2559 if ( ! $is_single_tag ) { 2558 2560 $attributes = trim( substr( $attributes, 0, -1 ) ) . "></$tag"; 2559 2561 } 2560 } elseif ( $is_single_tag ) { // Else If it's a known single-entity tag but it doesn't close itself, do so2562 } elseif ( $is_single_tag ) { // Else if it's a known single-entity tag but it doesn't close itself, do so. 2561 2563 $pre_attribute_ws = ' '; 2562 2564 $attributes .= '/'; … … 2587 2589 } 2588 2590 2589 // Clear Tag Queue.2591 // Clear tag queue. 2590 2592 $newtext .= $tagqueue; 2591 2593 … … 2819 2821 2820 2822 if ( ')' == $matches[3] && strpos( $url, '(' ) ) { 2821 // If the trailing character is a closing parethesis, and the URL has an opening parenthesis in it, add the closing parenthesis to the URL.2822 // Then we can let the parenthesis balancer do its thing below.2823 // If the trailing character is a closing parethesis, and the URL has an opening parenthesis in it, 2824 // add the closing parenthesis to the URL. Then we can let the parenthesis balancer do its thing below. 2823 2825 $url .= $matches[3]; 2824 2826 $suffix = ''; … … 2827 2829 } 2828 2830 2829 // Include parentheses in the URL only if paired 2831 // Include parentheses in the URL only if paired. 2830 2832 while ( substr_count( $url, '(' ) < substr_count( $url, ')' ) ) { 2831 2833 $suffix = strrchr( $url, ')' ) . $suffix; … … 2874 2876 $dest = 'http://' . $dest; 2875 2877 2876 // removed trailing [.,;:)] from URL2878 // Removed trailing [.,;:)] from URL. 2877 2879 if ( in_array( substr( $dest, -1 ), array( '.', ',', ';', ':', ')' ) ) === true ) { 2878 2880 $ret = substr( $dest, -1 ); … … 2927 2929 function make_clickable( $text ) { 2928 2930 $r = ''; 2929 $textarr = preg_split( '/(<[^<>]+>)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); // split out HTML tags2930 $nested_code_pre = 0; // Keep track of how many levels link is nested inside <pre> or <code> 2931 $textarr = preg_split( '/(<[^<>]+>)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); // Split out HTML tags. 2932 $nested_code_pre = 0; // Keep track of how many levels link is nested inside <pre> or <code>. 2931 2933 foreach ( $textarr as $piece ) { 2932 2934 … … 2942 2944 } 2943 2945 2944 // Long strings might contain expensive edge cases ...2946 // Long strings might contain expensive edge cases... 2945 2947 if ( 10000 < strlen( $piece ) ) { 2946 // ... break it up2947 foreach ( _split_str_by_whitespace( $piece, 2100 ) as $chunk ) { // 2100: Extra room for scheme and leading and trailing paretheses 2948 // ...break it up. 2949 foreach ( _split_str_by_whitespace( $piece, 2100 ) as $chunk ) { // 2100: Extra room for scheme and leading and trailing paretheses. 2948 2950 if ( 2101 < strlen( $chunk ) ) { 2949 2951 $r .= $chunk; // Too big, no whitespace: bail. … … 2953 2955 } 2954 2956 } else { 2955 $ret = " $piece "; // Pad with whitespace to simplify the regexes 2957 $ret = " $piece "; // Pad with whitespace to simplify the regexes. 2956 2958 2957 2959 $url_clickable = '~ 2958 ([\\s(<.,;:!?]) # 1: Leading whitespace, or punctuation2959 ( # 2: URL2960 [\\w]{1,20}+:// # Scheme and hier-part prefix 2961 (?=\S{1,2000}\s) # Limit to URLs less than about 2000 characters long 2962 [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]*+ # Non-punctuation URL character 2963 (?: # Unroll the Loop: Only allow puctuation URL character if followed by a non-punctuation URL character 2964 [\'.,;:!?)] # Punctuation URL character2965 [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]++ # Non-punctuation URL character2960 ([\\s(<.,;:!?]) # 1: Leading whitespace, or punctuation. 2961 ( # 2: URL. 2962 [\\w]{1,20}+:// # Scheme and hier-part prefix. 2963 (?=\S{1,2000}\s) # Limit to URLs less than about 2000 characters long. 2964 [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]*+ # Non-punctuation URL character. 2965 (?: # Unroll the Loop: Only allow puctuation URL character if followed by a non-punctuation URL character. 2966 [\'.,;:!?)] # Punctuation URL character. 2967 [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]++ # Non-punctuation URL character. 2966 2968 )* 2967 2969 ) 2968 (\)?) # 3: Trailing closing parenthesis (for parethesis balancing post processing)2970 (\)?) # 3: Trailing closing parenthesis (for parethesis balancing post processing). 2969 2971 ~xS'; 2970 2972 // The regex is a non-anchored pattern and does not have a single fixed starting character. … … 2981 2983 } 2982 2984 2983 // Cleanup of accidental links within links 2985 // Cleanup of accidental links within links. 2984 2986 return preg_replace( '#(<a([ \r\n\t]+[^>]+?>|>))<a [^>]+?>([^>]+?)</a></a>#i', '$1$3</a>', $r ); 2985 2987 } … … 2997 2999 * _split_str_by_whitespace( "1234 67890 1234 67890a cd 1234 890 123456789 1234567890a 45678 1 3 5 7 90 ", 10 ) == 2998 3000 * array ( 2999 * 0 => '1234 67890 ', // 11 characters: Perfect split 3000 * 1 => '1234 ', // 5 characters: '1234 67890a' was too long 3001 * 2 => '67890a cd ', // 10 characters: '67890a cd 1234' was too long 3002 * 3 => '1234 890 ', // 11 characters: Perfect split 3003 * 4 => '123456789 ', // 10 characters: '123456789 1234567890a' was too long 3004 * 5 => '1234567890a ', // 12 characters: Too long, but no inner whitespace on which to split 3005 * 6 => ' 45678 ', // 11 characters: Perfect split 3006 * 7 => '1 3 5 7 90 ', // 11 characters: End of $string 3001 * 0 => '1234 67890 ', // 11 characters: Perfect split. 3002 * 1 => '1234 ', // 5 characters: '1234 67890a' was too long. 3003 * 2 => '67890a cd ', // 10 characters: '67890a cd 1234' was too long. 3004 * 3 => '1234 890 ', // 11 characters: Perfect split. 3005 * 4 => '123456789 ', // 10 characters: '123456789 1234567890a' was too long. 3006 * 5 => '1234567890a ', // 12 characters: Too long, but no inner whitespace on which to split. 3007 * 6 => ' 45678 ', // 11 characters: Perfect split. 3008 * 7 => '1 3 5 7 90 ', // 11 characters: End of $string. 3007 3009 * ); 3008 3010 * … … 3189 3191 $original_link_html = $link_html; 3190 3192 3191 // Consider the html escaped if there are no unescaped quotes3193 // Consider the HTML escaped if there are no unescaped quotes. 3192 3194 $is_escaped = ! preg_match( '/(^|[^\\\\])[\'"]/', $link_html ); 3193 3195 if ( $is_escaped ) { 3194 // Replace only the quotes so that they are parsable by wp_kses_hair , leave the rest as is3196 // Replace only the quotes so that they are parsable by wp_kses_hair(), leave the rest as is. 3195 3197 $link_html = preg_replace( '/\\\\([\'"])/', '$1', $link_html ); 3196 3198 } … … 3208 3210 $rel = apply_filters( 'wp_targeted_link_rel', 'noopener noreferrer', $link_html ); 3209 3211 3210 // Return early if no rel values to be added or if no actual target attribute 3212 // Return early if no rel values to be added or if no actual target attribute. 3211 3213 if ( ! $rel || ! isset( $atts['target'] ) ) { 3212 3214 return "<a $original_link_html>"; … … 3339 3341 $output = ''; 3340 3342 if ( get_option( 'use_smilies' ) && ! empty( $wp_smiliessearch ) ) { 3341 // HTML loop taken from texturize function, could possible be consolidated 3342 $textarr = preg_split( '/(<.*>)/U', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); // capture the tags as well as in between3343 $stop = count( $textarr ); // loop stuff3344 3345 // Ignore proessing of specific tags 3343 // HTML loop taken from texturize function, could possible be consolidated. 3344 $textarr = preg_split( '/(<.*>)/U', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); // Capture the tags as well as in between. 3345 $stop = count( $textarr ); // Loop stuff. 3346 3347 // Ignore proessing of specific tags. 3346 3348 $tags_to_ignore = 'code|pre|style|script|textarea'; 3347 3349 $ignore_block_element = ''; … … 3350 3352 $content = $textarr[ $i ]; 3351 3353 3352 // If we're in an ignore block, wait until we find its closing tag 3354 // If we're in an ignore block, wait until we find its closing tag. 3353 3355 if ( '' == $ignore_block_element && preg_match( '/^<(' . $tags_to_ignore . ')[^>]*>/', $content, $matches ) ) { 3354 3356 $ignore_block_element = $matches[1]; 3355 3357 } 3356 3358 3357 // If it's not a tag and not in ignore block 3359 // If it's not a tag and not in ignore block. 3358 3360 if ( '' == $ignore_block_element && strlen( $content ) > 0 && '<' != $content[0] ) { 3359 3361 $content = preg_replace_callback( $wp_smiliessearch, 'translate_smiley', $content ); 3360 3362 } 3361 3363 3362 // did we exit ignore block3364 // Did we exit ignore block? 3363 3365 if ( '' != $ignore_block_element && '</' . $ignore_block_element . '>' == $content ) { 3364 3366 $ignore_block_element = ''; … … 3368 3370 } 3369 3371 } else { 3370 // return default text.3372 // Return default text. 3371 3373 $output = $text; 3372 3374 } … … 3390 3392 } 3391 3393 3392 // Test for the minimum length the email can be 3394 // Test for the minimum length the email can be. 3393 3395 if ( strlen( $email ) < 6 ) { 3394 3396 /** … … 3408 3410 } 3409 3411 3410 // Test for an @ character after the first position 3412 // Test for an @ character after the first position. 3411 3413 if ( strpos( $email, '@', 1 ) === false ) { 3412 3414 /** This filter is documented in wp-includes/formatting.php */ … … 3414 3416 } 3415 3417 3416 // Split out the local and domain parts 3418 // Split out the local and domain parts. 3417 3419 list( $local, $domain ) = explode( '@', $email, 2 ); 3418 3420 3419 3421 // LOCAL PART 3420 // Test for invalid characters 3422 // Test for invalid characters. 3421 3423 if ( ! preg_match( '/^[a-zA-Z0-9!#$%&\'*+\/=?^_`{|}~\.-]+$/', $local ) ) { 3422 3424 /** This filter is documented in wp-includes/formatting.php */ … … 3425 3427 3426 3428 // DOMAIN PART 3427 // Test for sequences of periods 3429 // Test for sequences of periods. 3428 3430 if ( preg_match( '/\.{2,}/', $domain ) ) { 3429 3431 /** This filter is documented in wp-includes/formatting.php */ … … 3431 3433 } 3432 3434 3433 // Test for leading and trailing periods and whitespace 3435 // Test for leading and trailing periods and whitespace. 3434 3436 if ( trim( $domain, " \t\n\r\0\x0B." ) !== $domain ) { 3435 3437 /** This filter is documented in wp-includes/formatting.php */ … … 3437 3439 } 3438 3440 3439 // Split the domain into subs 3441 // Split the domain into subs. 3440 3442 $subs = explode( '.', $domain ); 3441 3443 3442 // Assume the domain will have at least two subs 3444 // Assume the domain will have at least two subs. 3443 3445 if ( 2 > count( $subs ) ) { 3444 3446 /** This filter is documented in wp-includes/formatting.php */ … … 3446 3448 } 3447 3449 3448 // Loop through each sub 3450 // Loop through each sub. 3449 3451 foreach ( $subs as $sub ) { 3450 // Test for leading and trailing hyphens and whitespace 3452 // Test for leading and trailing hyphens and whitespace. 3451 3453 if ( trim( $sub, " \t\n\r\0\x0B-" ) !== $sub ) { 3452 3454 /** This filter is documented in wp-includes/formatting.php */ … … 3454 3456 } 3455 3457 3456 // Test for invalid characters 3458 // Test for invalid characters. 3457 3459 if ( ! preg_match( '/^[a-z0-9-]+$/i', $sub ) ) { 3458 3460 /** This filter is documented in wp-includes/formatting.php */ … … 3461 3463 } 3462 3464 3463 // Congratulations your email made it!3465 // Congratulations, your email made it! 3464 3466 /** This filter is documented in wp-includes/formatting.php */ 3465 3467 return apply_filters( 'is_email', $email, $email, null ); … … 3550 3552 */ 3551 3553 function iso8601_timezone_to_offset( $timezone ) { 3552 // $timezone is either 'Z' or '[+|-]hhmm' 3554 // $timezone is either 'Z' or '[+|-]hhmm'. 3553 3555 if ( $timezone == 'Z' ) { 3554 3556 $offset = 0; … … 3600 3602 */ 3601 3603 function sanitize_email( $email ) { 3602 // Test for the minimum length the email can be 3604 // Test for the minimum length the email can be. 3603 3605 if ( strlen( $email ) < 6 ) { 3604 3606 /** … … 3618 3620 } 3619 3621 3620 // Test for an @ character after the first position 3622 // Test for an @ character after the first position. 3621 3623 if ( strpos( $email, '@', 1 ) === false ) { 3622 3624 /** This filter is documented in wp-includes/formatting.php */ … … 3624 3626 } 3625 3627 3626 // Split out the local and domain parts 3628 // Split out the local and domain parts. 3627 3629 list( $local, $domain ) = explode( '@', $email, 2 ); 3628 3630 3629 3631 // LOCAL PART 3630 // Test for invalid characters 3632 // Test for invalid characters. 3631 3633 $local = preg_replace( '/[^a-zA-Z0-9!#$%&\'*+\/=?^_`{|}~\.-]/', '', $local ); 3632 3634 if ( '' === $local ) { … … 3636 3638 3637 3639 // DOMAIN PART 3638 // Test for sequences of periods 3640 // Test for sequences of periods. 3639 3641 $domain = preg_replace( '/\.{2,}/', '', $domain ); 3640 3642 if ( '' === $domain ) { … … 3643 3645 } 3644 3646 3645 // Test for leading and trailing periods and whitespace 3647 // Test for leading and trailing periods and whitespace. 3646 3648 $domain = trim( $domain, " \t\n\r\0\x0B." ); 3647 3649 if ( '' === $domain ) { … … 3650 3652 } 3651 3653 3652 // Split the domain into subs 3654 // Split the domain into subs. 3653 3655 $subs = explode( '.', $domain ); 3654 3656 3655 // Assume the domain will have at least two subs 3657 // Assume the domain will have at least two subs. 3656 3658 if ( 2 > count( $subs ) ) { 3657 3659 /** This filter is documented in wp-includes/formatting.php */ … … 3659 3661 } 3660 3662 3661 // Create an array that will contain valid subs 3663 // Create an array that will contain valid subs. 3662 3664 $new_subs = array(); 3663 3665 3664 // Loop through each sub 3666 // Loop through each sub. 3665 3667 foreach ( $subs as $sub ) { 3666 // Test for leading and trailing hyphens 3668 // Test for leading and trailing hyphens. 3667 3669 $sub = trim( $sub, " \t\n\r\0\x0B-" ); 3668 3670 3669 // Test for invalid characters 3671 // Test for invalid characters. 3670 3672 $sub = preg_replace( '/[^a-z0-9-]+/i', '', $sub ); 3671 3673 3672 // If there's anything left, add it to the valid subs 3674 // If there's anything left, add it to the valid subs. 3673 3675 if ( '' !== $sub ) { 3674 3676 $new_subs[] = $sub; … … 3676 3678 } 3677 3679 3678 // If there aren't 2 or more valid subs 3680 // If there aren't 2 or more valid subs. 3679 3681 if ( 2 > count( $new_subs ) ) { 3680 3682 /** This filter is documented in wp-includes/formatting.php */ … … 3682 3684 } 3683 3685 3684 // Join valid subs into the new domain 3686 // Join valid subs into the new domain. 3685 3687 $domain = join( '.', $new_subs ); 3686 3688 3687 // Put the email back together 3689 // Put the email back together. 3688 3690 $sanitized_email = $local . '@' . $domain; 3689 3691 3690 // Congratulations your email made it!3692 // Congratulations, your email made it! 3691 3693 /** This filter is documented in wp-includes/formatting.php */ 3692 3694 return apply_filters( 'sanitize_email', $sanitized_email, $email, null ); … … 4305 4307 4306 4308 $url = str_replace( ';//', '://', $url ); 4307 /* If the URL doesn't appear to contain a scheme, we 4308 * presume it needs http:// prepended (unless a relative 4309 * link starting with /, # or ? or a php file). 4309 /* 4310 * If the URL doesn't appear to contain a scheme, we presume 4311 * it needs http:// prepended (unless it's a relative link 4312 * starting with /, # or ?, or a PHP file). 4310 4313 */ 4311 4314 if ( strpos( $url, ':' ) === false && ! in_array( $url[0], array( '/', '#', '?' ) ) && … … 4623 4626 case 'default_ping_status': 4624 4627 case 'default_comment_status': 4625 // Options that if not there have 0 value but need to be something like "closed" 4628 // Options that if not there have 0 value but need to be something like "closed". 4626 4629 if ( $value == '0' || $value == '' ) { 4627 4630 $value = 'closed'; … … 4644 4647 4645 4648 case 'blog_charset': 4646 $value = preg_replace( '/[^a-zA-Z0-9_-]/', '', $value ); // strips slashes4649 $value = preg_replace( '/[^a-zA-Z0-9_-]/', '', $value ); // Strips slashes. 4647 4650 break; 4648 4651 … … 4679 4682 4680 4683 case 'gmt_offset': 4681 $value = preg_replace( '/[^0-9:.-]/', '', $value ); // strips slashes4684 $value = preg_replace( '/[^0-9:.-]/', '', $value ); // Strips slashes. 4682 4685 break; 4683 4686 … … 4949 4952 $arg_index = 0; 4950 4953 while ( $len > $start ) { 4951 // Last character: append and break 4954 // Last character: append and break. 4952 4955 if ( strlen( $pattern ) - 1 == $start ) { 4953 4956 $result .= substr( $pattern, -1 ); … … 4955 4958 } 4956 4959 4957 // Literal %: append and continue 4960 // Literal %: append and continue. 4958 4961 if ( substr( $pattern, $start, 2 ) == '%%' ) { 4959 4962 $start += 2; … … 4962 4965 } 4963 4966 4964 // Get fragment before next % 4967 // Get fragment before next %. 4965 4968 $end = strpos( $pattern, '%', $start + 1 ); 4966 4969 if ( false === $end ) { … … 4969 4972 $fragment = substr( $pattern, $start, $end - $start ); 4970 4973 4971 // Fragment has a specifier 4974 // Fragment has a specifier. 4972 4975 if ( $pattern[ $start ] == '%' ) { 4973 // Find numbered arguments or take the next one in order 4976 // Find numbered arguments or take the next one in order. 4974 4977 if ( preg_match( '/^%(\d+)\$/', $fragment, $matches ) ) { 4975 $index = $matches[1] - 1; // 0-based array vs 1-based sprintf arguments.4978 $index = $matches[1] - 1; // 0-based array vs 1-based sprintf() arguments. 4976 4979 $arg = isset( $args[ $index ] ) ? $args[ $index ] : ''; 4977 4980 $fragment = str_replace( "%{$matches[1]}$", '%', $fragment ); … … 4999 5002 } 5000 5003 5001 // Append to result and move to next fragment 5004 // Append to result and move to next fragment. 5002 5005 $result .= $fragment; 5003 5006 $start = $end; 5004 5007 } 5008 5005 5009 return $result; 5006 5010 } … … 5020 5024 */ 5021 5025 function wp_sprintf_l( $pattern, $args ) { 5022 // Not a match 5026 // Not a match. 5023 5027 if ( substr( $pattern, 0, 2 ) != '%l' ) { 5024 5028 return $pattern; 5025 5029 } 5026 5030 5027 // Nothing to work with 5031 // Nothing to work with. 5028 5032 if ( empty( $args ) ) { 5029 5033 return ''; … … 5058 5062 $result .= $l['between_only_two'] . array_shift( $args ); 5059 5063 } 5060 // Loop when more than two args 5064 5065 // Loop when more than two args. 5061 5066 $i = count( $args ); 5062 5067 while ( $i ) { … … 5069 5074 } 5070 5075 } 5076 5071 5077 return $result . substr( $pattern, 2 ); 5072 5078 } … … 5090 5096 $more = ''; 5091 5097 } 5098 5092 5099 $str = wp_strip_all_tags( $str, true ); 5093 5100 $excerpt = mb_substr( $str, 0, $count ); 5094 // remove part of an entity at the end 5101 5102 // Remove part of an entity at the end. 5095 5103 $excerpt = preg_replace( '/&[^;\s]{0,6}$/', '', $excerpt ); 5096 5104 if ( $str != $excerpt ) { 5097 5105 $excerpt = trim( $excerpt ) . $more; 5098 5106 } 5107 5099 5108 return $excerpt; 5100 5109 } … … 5135 5144 function _links_add_base( $m ) { 5136 5145 global $_links_add_base; 5137 // 1 = attribute name 2 = quotation mark 3 = URL5146 // 1 = attribute name 2 = quotation mark 3 = URL. 5138 5147 return $m[1] . '=' . $m[2] . 5139 5148 ( preg_match( '#^(\w{1,20}):#', $m[3], $protocol ) && in_array( $protocol[1], wp_allowed_protocols() ) ? … … 5309 5318 $filtered = wp_strip_all_tags( $filtered, false ); 5310 5319 5311 // Use htmlentities in a special case to make sure no later5312 // newline stripping stage could lead to a functional tag 5320 // Use HTML entities in a special case to make sure no later 5321 // newline stripping stage could lead to a functional tag. 5313 5322 $filtered = str_replace( "<\n", "<\n", $filtered ); 5314 5323 } … … 5360 5369 */ 5361 5370 function capital_P_dangit( $text ) { 5362 // Simple replacement for titles 5371 // Simple replacement for titles. 5363 5372 $current_filter = current_filter(); 5364 5373 if ( 'the_title' === $current_filter || 'wp_title' === $current_filter ) { 5365 5374 return str_replace( 'Wordpress', 'WordPress', $text ); 5366 5375 } 5367 // Still here? Use the more judicious replacement 5376 // Still here? Use the more judicious replacement. 5368 5377 static $dblq = false; 5369 5378 if ( false === $dblq ) { … … 5523 5532 * 5524 5533 * By default, spaces include new lines, tabs, nbsp entities, and the UTF-8 nbsp. 5525 * This is designed to replace the PCRE \s sequence. In ticket #22692, that5534 * This is designed to replace the PCRE \s sequence. In ticket #22692, that 5526 5535 * sequence was found to be unreliable due to random inclusion of the A0 byte. 5527 5536 * … … 5799 5808 } 5800 5809 5801 // Did we exit ignore block .5810 // Did we exit ignore block? 5802 5811 if ( '' != $ignore_block_element && '</' . $ignore_block_element . '>' == $content ) { 5803 5812 $ignore_block_element = ''; … … 5807 5816 } 5808 5817 5809 // Finally, remove any stray U+FE0F characters 5818 // Finally, remove any stray U+FE0F characters. 5810 5819 $output = str_replace( '️', '', $output ); 5811 5820
Note: See TracChangeset
for help on using the changeset viewer.