Changeset 38358
- Timestamp:
- 08/26/2016 09:24:34 AM (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-admin/js/word-count.js
r33440 r38358 1 /** 2 * Word or character counting functionality. Count words or characters in a provided text string. 3 * 4 * @summary Count words or characters in a text. 5 * 6 * @namespace wp.utils 7 * @since 2.6 8 */ 9 1 10 ( function() { 11 /** 12 * Word counting utility 13 * 14 * @namespace wp.utils.wordcounter 15 * @memberof wp.utils 16 * 17 * @class 18 * 19 * @param {Object} settings Optional. Key-value object containing overrides for 20 * settings. 21 * @param {RegExp} settings.HTMLRegExp Optional. Regular expression to find HTML elements. 22 * @param {RegExp} settings.HTMLcommentRegExp Optional. Regular expression to find HTML comments. 23 * @param {RegExp} settings.spaceRegExp Optional. Regular expression to find irregular space 24 * characters. 25 * @param {RegExp} settings.HTMLEntityRegExp Optional. Regular expression to find HTML entities. 26 * @param {RegExp} settings.connectorRegExp Optional. Regular expression to find connectors that 27 * split words. 28 * @param {RegExp} settings.removeRegExp Optional. Regular expression to find remove unwanted 29 * characters to reduce false-positives. 30 * @param {RegExp} settings.astralRegExp Optional. Regular expression to find unwanted 31 * characters when searching for non-words. 32 * @param {RegExp} settings.wordsRegExp Optional. Regular expression to find words by spaces. 33 * @param {RegExp} settings.characters_excluding_spacesRegExp Optional. Regular expression to find characters which 34 * are non-spaces. 35 * @param {RegExp} settings.characters_including_spacesRegExp Optional. Regular expression to find characters 36 * including spaces. 37 * @param {RegExp} settings.shortcodesRegExp Optional. Regular expression to find shortcodes. 38 * @param {Object} settings.l10n Optional. Localization object containing specific 39 * configuration for the current localization. 40 * @param {String} settings.l10n.type Optional. Method of finding words to count. 41 * @param {Array} settings.l10n.shortcodes Optional. Array of shortcodes that should be removed 42 * from the text. 43 * 44 * @return void 45 */ 2 46 function WordCounter( settings ) { 3 47 var key, 4 48 shortcodes; 5 49 50 // Apply provided settings to object settings. 6 51 if ( settings ) { 7 52 for ( key in settings ) { 53 54 // Only apply valid settings. 8 55 if ( settings.hasOwnProperty( key ) ) { 9 56 this.settings[ key ] = settings[ key ]; … … 14 61 shortcodes = this.settings.l10n.shortcodes; 15 62 63 // If there are any localization shortcodes, add this as type in the settings. 16 64 if ( shortcodes && shortcodes.length ) { 17 65 this.settings.shortcodesRegExp = new RegExp( '\\[\\/?(?:' + shortcodes.join( '|' ) + ')[^\\]]*?\\]', 'g' ); … … 19 67 } 20 68 69 // Default settings. 21 70 WordCounter.prototype.settings = { 22 71 HTMLRegExp: /<\/?[a-z][^>]*?>/gi, … … 24 73 spaceRegExp: / | /gi, 25 74 HTMLEntityRegExp: /&\S+?;/g, 75 76 // \u2014 = em-dash 26 77 connectorRegExp: /--|\u2014/g, 78 79 // Characters to be removed from input text. 27 80 removeRegExp: new RegExp( [ 28 81 '[', 82 29 83 // Basic Latin (extract) 30 84 '\u0021-\u0040\u005B-\u0060\u007B-\u007E', 85 31 86 // Latin-1 Supplement (extract) 32 87 '\u0080-\u00BF\u00D7\u00F7', 33 // General Punctuation 34 // Superscripts and Subscripts 35 // Currency Symbols 36 // Combining Diacritical Marks for Symbols 37 // Letterlike Symbols 38 // Number Forms 39 // Arrows 40 // Mathematical Operators 41 // Miscellaneous Technical 42 // Control Pictures 43 // Optical Character Recognition 44 // Enclosed Alphanumerics 45 // Box Drawing 46 // Block Elements 47 // Geometric Shapes 48 // Miscellaneous Symbols 49 // Dingbats 50 // Miscellaneous Mathematical Symbols-A 51 // Supplemental Arrows-A 52 // Braille Patterns 53 // Supplemental Arrows-B 54 // Miscellaneous Mathematical Symbols-B 55 // Supplemental Mathematical Operators 56 // Miscellaneous Symbols and Arrows 88 89 /* 90 * The following range consists of: 91 * General Punctuation 92 * Superscripts and Subscripts 93 * Currency Symbols 94 * Combining Diacritical Marks for Symbols 95 * Letterlike Symbols 96 * Number Forms 97 * Arrows 98 * Mathematical Operators 99 * Miscellaneous Technical 100 * Control Pictures 101 * Optical Character Recognition 102 * Enclosed Alphanumerics 103 * Box Drawing 104 * Block Elements 105 * Geometric Shapes 106 * Miscellaneous Symbols 107 * Dingbats 108 * Miscellaneous Mathematical Symbols-A 109 * Supplemental Arrows-A 110 * Braille Patterns 111 * Supplemental Arrows-B 112 * Miscellaneous Mathematical Symbols-B 113 * Supplemental Mathematical Operators 114 * Miscellaneous Symbols and Arrows 115 */ 57 116 '\u2000-\u2BFF', 117 58 118 // Supplemental Punctuation 59 119 '\u2E00-\u2E7F', 60 120 ']' 61 121 ].join( '' ), 'g' ), 122 123 // Remove UTF-16 surrogate points, see https://en.wikipedia.org/wiki/UTF-16#U.2BD800_to_U.2BDFFF 62 124 astralRegExp: /[\uD800-\uDBFF][\uDC00-\uDFFF]/g, 63 125 wordsRegExp: /\S\s+/g, 64 126 characters_excluding_spacesRegExp: /\S/g, 127 128 /* 129 * Match anything that is not a formatting character, excluding: 130 * \f = form feed 131 * \n = new line 132 * \r = carriage return 133 * \t = tab 134 * \v = vertical tab 135 * \u00AD = soft hyphen 136 * \u2028 = line separator 137 * \u2029 = paragraph separator 138 */ 65 139 characters_including_spacesRegExp: /[^\f\n\r\t\v\u00AD\u2028\u2029]/g, 66 140 l10n: window.wordCountL10n || {} 67 141 }; 68 142 143 /** 144 * Counts the number of words (or other specified type) in the specified text. 145 * 146 * @summary Count the number of elements in a text. 147 * 148 * @since 2.6 149 * @memberof wp.utils.wordcounter 150 * 151 * @param {String} text Text to count elements in. 152 * @param {String} type Optional. Specify type to use. 153 * 154 * @return {Number} The number of items counted. 155 */ 69 156 WordCounter.prototype.count = function( text, type ) { 70 157 var count = 0; 71 158 159 // Use default type if none was provided. 72 160 type = type || this.settings.l10n.type; 73 161 162 // Sanitize type to one of three possibilities: 'words', 'characters_excluding_spaces' or 'characters_including_spaces'. 74 163 if ( type !== 'characters_excluding_spaces' && type !== 'characters_including_spaces' ) { 75 164 type = 'words'; 76 165 } 77 166 167 // If we have any text at all. 78 168 if ( text ) { 79 169 text = text + '\n'; 80 170 171 // Replace all HTML with a new-line. 81 172 text = text.replace( this.settings.HTMLRegExp, '\n' ); 173 174 // Remove all HTML comments. 82 175 text = text.replace( this.settings.HTMLcommentRegExp, '' ); 83 176 177 // If a shortcode regular expression has been provided use it to remove shortcodes. 84 178 if ( this.settings.shortcodesRegExp ) { 85 179 text = text.replace( this.settings.shortcodesRegExp, '\n' ); 86 180 } 87 181 182 // Normalize non-breaking space to a normal space. 88 183 text = text.replace( this.settings.spaceRegExp, ' ' ); 89 184 90 185 if ( type === 'words' ) { 186 187 // Remove HTML Entities. 91 188 text = text.replace( this.settings.HTMLEntityRegExp, '' ); 189 190 // Convert connectors to spaces to count attached text as words. 92 191 text = text.replace( this.settings.connectorRegExp, ' ' ); 192 193 // Remove unwanted characters. 93 194 text = text.replace( this.settings.removeRegExp, '' ); 94 195 } else { 196 197 // Convert HTML Entities to "a". 95 198 text = text.replace( this.settings.HTMLEntityRegExp, 'a' ); 199 200 // Remove surrogate points. 96 201 text = text.replace( this.settings.astralRegExp, 'a' ); 97 202 } 98 203 204 // Match with the selected type regular expression to count the items. 99 205 text = text.match( this.settings[ type + 'RegExp' ] ); 100 206 207 // If we have any matches, set the count to the number of items found. 101 208 if ( text ) { 102 209 count = text.length; … … 107 214 }; 108 215 216 // Add the WordCounter to the WP Utils. 109 217 window.wp = window.wp || {}; 110 218 window.wp.utils = window.wp.utils || {};
Note: See TracChangeset
for help on using the changeset viewer.