| 1 | <?php |
|---|
| 2 | /* |
|---|
| 3 | Plugin Name: WP Autop |
|---|
| 4 | Plugin URI: http://wordpress.org/plugins/ |
|---|
| 5 | Description: Feature plugin to improve the wpautop() functionality. |
|---|
| 6 | Version: 0.1 |
|---|
| 7 | |
|---|
| 8 | Released under the GPL v.2, http://www.gnu.org/licenses/gpl-2.0.html |
|---|
| 9 | |
|---|
| 10 | This program is distributed in the hope that it will be useful, |
|---|
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 13 | GNU General Public License for more details. |
|---|
| 14 | */ |
|---|
| 15 | |
|---|
| 16 | class WP_Autop { |
|---|
| 17 | /** |
|---|
| 18 | * Most elements that are used in the body of documents |
|---|
| 19 | * and applications are categorized as flow content. |
|---|
| 20 | * |
|---|
| 21 | * @see http://www.w3.org/TR/html5/dom.html#flow-content |
|---|
| 22 | */ |
|---|
| 23 | protected static $flowContent = array( |
|---|
| 24 | 'a', 'abbr', 'address', 'area', 'article', 'aside', 'audio', 'b', 'bdi', |
|---|
| 25 | 'bdo', 'blockquote', 'br', 'button', 'canvas', 'cite', 'code', 'data', |
|---|
| 26 | 'datalist', 'del', 'dfn', 'div', 'dl', 'em', 'embed', 'fieldset', 'figure', |
|---|
| 27 | 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hr', 'i', |
|---|
| 28 | 'iframe', 'img', 'input', 'ins', 'kbd', 'keygen', 'label', 'main', 'map', |
|---|
| 29 | 'mark', 'math', 'meter', 'nav', 'noscript', 'object', 'ol', 'output', 'p', |
|---|
| 30 | 'pre', 'progress', 'q', 'ruby', 's', 'samp', 'script', 'section', 'select', |
|---|
| 31 | 'small', 'span', 'strong', 'sub', 'sup', 'svg', 'table', 'template', |
|---|
| 32 | 'textarea', 'time', 'u', 'ul', 'var', 'video', |
|---|
| 33 | ); |
|---|
| 34 | |
|---|
| 35 | /** |
|---|
| 36 | * Phrasing content is the text of the document, |
|---|
| 37 | * as well as elements that mark up that text at the intra-paragraph level. |
|---|
| 38 | * |
|---|
| 39 | * @see http://www.w3.org/TR/html5/dom.html#phrasing-content |
|---|
| 40 | */ |
|---|
| 41 | protected static $phrasingContent = array( |
|---|
| 42 | 'a', 'abbr', 'area', 'audio', 'b', 'bdi', 'bdo', 'br', 'button', 'canvas', |
|---|
| 43 | 'cite', 'code', 'data', 'datalist', 'del', 'dfn', 'em', 'embed', 'i', |
|---|
| 44 | 'iframe', 'img', 'input', 'ins', 'kbd', 'keygen', 'label', 'map', 'mark', |
|---|
| 45 | 'math', 'meter', 'noscript', 'object', 'output', 'progress', 'q', 'ruby', |
|---|
| 46 | 's', 'samp', 'script', 'select', 'small', 'span', 'strong', 'sub', 'sup', |
|---|
| 47 | 'svg', 'template', 'textarea', 'time', 'u', 'var', 'video', 'wbr', |
|---|
| 48 | ); |
|---|
| 49 | |
|---|
| 50 | /** |
|---|
| 51 | * Grouping of elements where flow content is expected. |
|---|
| 52 | * |
|---|
| 53 | * @see http://www.w3.org/TR/html5/grouping-content.html |
|---|
| 54 | * @see http://www.w3.org/TR/html5/tabular-data.html |
|---|
| 55 | */ |
|---|
| 56 | protected static $groupingContent = array( |
|---|
| 57 | // Grouping content |
|---|
| 58 | 'ol', 'ul', 'dl', |
|---|
| 59 | // Tabular data |
|---|
| 60 | 'table', 'tbody', 'thead', 'tfoot', 'tr', |
|---|
| 61 | ); |
|---|
| 62 | |
|---|
| 63 | /** |
|---|
| 64 | * Where flow content is expected. |
|---|
| 65 | * |
|---|
| 66 | * @see http://www.w3.org/TR/html5/grouping-content.html#the-p-element |
|---|
| 67 | */ |
|---|
| 68 | protected static $flowContainer = array( |
|---|
| 69 | // Flow content |
|---|
| 70 | /*'a',*/ 'address', 'article', 'aside', 'audio', 'blockquote', 'canvas', 'del', |
|---|
| 71 | 'div', 'fieldset', 'figure', 'footer', 'form', 'header', 'iframe', 'ins', |
|---|
| 72 | 'main', 'map', 'nav', 'noscript', 'object', 'section', 'video', |
|---|
| 73 | // Grouping content |
|---|
| 74 | 'li', 'dt', 'dd', 'figcaption', |
|---|
| 75 | // Tabular data |
|---|
| 76 | 'td', 'th', 'caption' |
|---|
| 77 | ); |
|---|
| 78 | |
|---|
| 79 | protected static $forceParagraph = array( |
|---|
| 80 | 'blockquote', |
|---|
| 81 | ); |
|---|
| 82 | |
|---|
| 83 | protected function isFlowContent( $el ) { |
|---|
| 84 | return isset( $el['tag'] ) && in_array( $el['tag'], self::$flowContent ); |
|---|
| 85 | } |
|---|
| 86 | |
|---|
| 87 | protected function isPhrasingContent( $el ) { |
|---|
| 88 | return isset( $el['tag'] ) && in_array( $el['tag'], self::$phrasingContent ); |
|---|
| 89 | } |
|---|
| 90 | |
|---|
| 91 | protected function isGroupingContent( $el ) { |
|---|
| 92 | return isset( $el['tag'] ) && in_array( $el['tag'], self::$groupingContent ); |
|---|
| 93 | } |
|---|
| 94 | |
|---|
| 95 | protected function isFlowContainer( $el ) { |
|---|
| 96 | return isset( $el['tag'] ) && in_array( $el['tag'], self::$flowContainer ); |
|---|
| 97 | } |
|---|
| 98 | |
|---|
| 99 | protected function isForceParagraph( $el ) { |
|---|
| 100 | return isset( $el['tag'] ) && in_array( $el['tag'], self::$forceParagraph ); |
|---|
| 101 | } |
|---|
| 102 | |
|---|
| 103 | /** |
|---|
| 104 | * autop |
|---|
| 105 | */ |
|---|
| 106 | public function autop( $text, $br = true ) { |
|---|
| 107 | return $this->treeToString( $this->parse( $text ), $br ); |
|---|
| 108 | } |
|---|
| 109 | |
|---|
| 110 | /** |
|---|
| 111 | * parse |
|---|
| 112 | */ |
|---|
| 113 | public function parse( $text ) { |
|---|
| 114 | $root = array( |
|---|
| 115 | 'type' => 'root', 'children' => array(), |
|---|
| 116 | ); |
|---|
| 117 | |
|---|
| 118 | $stack = array( |
|---|
| 119 | array( preg_replace( '%\R%u', "\n", $text ), &$root ), |
|---|
| 120 | ); |
|---|
| 121 | |
|---|
| 122 | /** ----------------------------- |
|---|
| 123 | * While stack |
|---|
| 124 | */ |
|---|
| 125 | |
|---|
| 126 | while ( $_s = array_pop( $stack ) ) { |
|---|
| 127 | |
|---|
| 128 | if ( strpos( $_s[0], '<' ) !== false ) { |
|---|
| 129 | preg_match_all( self::getRegex(), $_s[0], $_m, PREG_SET_ORDER ); |
|---|
| 130 | } else { |
|---|
| 131 | $_m = array( array( 'text' => $_s[0] ) ); |
|---|
| 132 | } |
|---|
| 133 | |
|---|
| 134 | $el = &$_s[1]; |
|---|
| 135 | $c = -1; |
|---|
| 136 | |
|---|
| 137 | /** ----------------------------- |
|---|
| 138 | * For each element ( text, tags, comments ) |
|---|
| 139 | */ |
|---|
| 140 | |
|---|
| 141 | for ( $i = 0, $ilen = count( $_m ); $i < $ilen; $i++ ) { |
|---|
| 142 | $m = $_m[$i]; |
|---|
| 143 | |
|---|
| 144 | /** --------------------------- |
|---|
| 145 | * Text |
|---|
| 146 | */ |
|---|
| 147 | |
|---|
| 148 | $m['text'] = isset( $m['text'] ) ? $m['text'] : null; |
|---|
| 149 | |
|---|
| 150 | if ( $m['text'] !== null && $m['text'] !== '' ) { |
|---|
| 151 | |
|---|
| 152 | preg_match( '%^(?<a>[[:space:]]*).*?(?<b>[[:space:]]*)$%su', $m['text'], $s ); |
|---|
| 153 | |
|---|
| 154 | $spacePattern = '%([[:space:]]*(\R)[[:space:]]*){2,}%u'; |
|---|
| 155 | |
|---|
| 156 | if ( $i !== 0 && $i !== $ilen - 1 && ! preg_match( $spacePattern, $s['a'] ) ) { |
|---|
| 157 | $el['children'][++$c] = array( |
|---|
| 158 | 'type' => 'space', |
|---|
| 159 | 'space' => $s['a'], |
|---|
| 160 | ); |
|---|
| 161 | } |
|---|
| 162 | |
|---|
| 163 | $m['text'] = trim( $m['text'] ); |
|---|
| 164 | |
|---|
| 165 | if ( $m['text'] !== '' ) { |
|---|
| 166 | $_p = preg_split( $spacePattern, $m['text'] ); |
|---|
| 167 | |
|---|
| 168 | foreach ( $_p as $p ) { |
|---|
| 169 | $el['children'][++$c] = array( |
|---|
| 170 | 'type' => 'text', |
|---|
| 171 | 'text' => $p, |
|---|
| 172 | ); |
|---|
| 173 | } |
|---|
| 174 | |
|---|
| 175 | if ( $i !== $ilen - 1 && ! preg_match( $spacePattern, $s['b'] ) ) { |
|---|
| 176 | $el['children'][++$c] = array( |
|---|
| 177 | 'type' => 'space', |
|---|
| 178 | 'space' => $s['b'], |
|---|
| 179 | ); |
|---|
| 180 | } |
|---|
| 181 | } |
|---|
| 182 | continue; |
|---|
| 183 | } // Text |
|---|
| 184 | |
|---|
| 185 | /** --------------------------- |
|---|
| 186 | * Tag |
|---|
| 187 | */ |
|---|
| 188 | |
|---|
| 189 | $m['raw'] = isset( $m['raw'] ) ? $m['raw'] : null; |
|---|
| 190 | $m['tag'] = isset( $m['tag'] ) ? $m['tag'] : null; |
|---|
| 191 | $m['attrs'] = isset( $m['attrs'] ) ? $m['attrs'] : null; |
|---|
| 192 | $m['inner'] = isset( $m['inner'] ) ? $m['inner'] : null; |
|---|
| 193 | |
|---|
| 194 | if ( $m['tag'] !== null && $m['tag'] !== '' ) { |
|---|
| 195 | |
|---|
| 196 | $m['tag'] = strtolower( $m['tag'] ); |
|---|
| 197 | |
|---|
| 198 | if ( $this->isFlowContainer( $m ) ) { |
|---|
| 199 | |
|---|
| 200 | $el['children'][++$c] = array( |
|---|
| 201 | 'type' => 'container', |
|---|
| 202 | 'tag' => $m['tag'], |
|---|
| 203 | 'attrs' => $m['attrs'], |
|---|
| 204 | 'children' => array(), |
|---|
| 205 | ); |
|---|
| 206 | |
|---|
| 207 | if ( $m['inner'] !== null && $m['inner'] !== '' ) { |
|---|
| 208 | array_push( $stack, array( $m['inner'], &$el['children'][$c] ) ); |
|---|
| 209 | } |
|---|
| 210 | |
|---|
| 211 | } elseif ( $this->isGroupingContent( $m ) ) { |
|---|
| 212 | |
|---|
| 213 | $el['children'][++$c] = array( |
|---|
| 214 | 'type' => 'grouping', |
|---|
| 215 | 'tag' => $m['tag'], |
|---|
| 216 | 'attrs' => $m['attrs'], |
|---|
| 217 | 'children' => array(), |
|---|
| 218 | ); |
|---|
| 219 | |
|---|
| 220 | if ( $m['inner'] !== null && $m['inner'] !== '' ) { |
|---|
| 221 | array_push( $stack, array( $m['inner'], &$el['children'][$c] ) ); |
|---|
| 222 | } |
|---|
| 223 | |
|---|
| 224 | } elseif ( $this->isPhrasingContent( $m ) ) { |
|---|
| 225 | |
|---|
| 226 | $el['children'][++$c] = array( |
|---|
| 227 | 'type' => 'phrasing', |
|---|
| 228 | 'raw' => $m['raw'], |
|---|
| 229 | ); |
|---|
| 230 | |
|---|
| 231 | } else { |
|---|
| 232 | |
|---|
| 233 | $el['children'][++$c] = array( |
|---|
| 234 | 'type' => 'flow', |
|---|
| 235 | 'raw' => $m['raw'], |
|---|
| 236 | ); |
|---|
| 237 | } |
|---|
| 238 | continue; |
|---|
| 239 | } // Tag |
|---|
| 240 | |
|---|
| 241 | /** --------------------------- |
|---|
| 242 | * Comment |
|---|
| 243 | */ |
|---|
| 244 | |
|---|
| 245 | $m['comment'] = isset( $m['comment'] ) ? $m['comment'] : null; |
|---|
| 246 | |
|---|
| 247 | if ( $m['comment'] !== null && $m['comment'] !== '' ) { |
|---|
| 248 | |
|---|
| 249 | $el['children'][++$c] = array( |
|---|
| 250 | 'type' => 'comment', |
|---|
| 251 | 'raw' => $m['raw'], |
|---|
| 252 | ); |
|---|
| 253 | continue; |
|---|
| 254 | } // Comment |
|---|
| 255 | |
|---|
| 256 | } // For each element |
|---|
| 257 | } // While stack |
|---|
| 258 | |
|---|
| 259 | return $root; |
|---|
| 260 | } |
|---|
| 261 | |
|---|
| 262 | /** |
|---|
| 263 | * treeToString |
|---|
| 264 | */ |
|---|
| 265 | protected function treeToString( $root, $br = true ) { |
|---|
| 266 | $stack = array( array( &$root, 0 ) ); |
|---|
| 267 | $root['out'] = ''; |
|---|
| 268 | |
|---|
| 269 | /** ----------------------------- |
|---|
| 270 | * While stack |
|---|
| 271 | */ |
|---|
| 272 | |
|---|
| 273 | while ( $_s = array_pop( $stack ) ) { |
|---|
| 274 | $parent = &$_s[0]; |
|---|
| 275 | $c = $_s[1]; |
|---|
| 276 | $out = &$parent['out']; |
|---|
| 277 | |
|---|
| 278 | /** ----------------------------- |
|---|
| 279 | * For each element ( text, tags, comments ) |
|---|
| 280 | */ |
|---|
| 281 | |
|---|
| 282 | for ( $i = $c, $len = count( $parent['children'] ); $i < $len; $i++ ) { |
|---|
| 283 | $el = &$parent['children'][$i]; |
|---|
| 284 | |
|---|
| 285 | /** --------------------------- |
|---|
| 286 | * Text, Phrasing |
|---|
| 287 | */ |
|---|
| 288 | |
|---|
| 289 | if ( $el['type'] === 'text' || $el['type'] === 'phrasing' ) { |
|---|
| 290 | $type = $el['type']; |
|---|
| 291 | |
|---|
| 292 | if ( $type === 'text' ) { |
|---|
| 293 | $content = str_replace( "\n", "<br>\n", $el['text'] ); |
|---|
| 294 | } else { |
|---|
| 295 | $content = $el['raw']; |
|---|
| 296 | } |
|---|
| 297 | |
|---|
| 298 | $j = $i; |
|---|
| 299 | while ( isset( $parent['children'][$i + 2] ) ) { |
|---|
| 300 | $a = $parent['children'][$i + 1]; |
|---|
| 301 | $b = $parent['children'][$i + 2]; |
|---|
| 302 | |
|---|
| 303 | if ( $a['type'] === 'space' ) { |
|---|
| 304 | if ( $b['type'] === 'text' ) { |
|---|
| 305 | if ( $type !== 'text' ) { |
|---|
| 306 | $type = $b['type']; |
|---|
| 307 | $tmp = $a['space'] . $b['text']; |
|---|
| 308 | $content .= str_replace( "\n", "<br>\n", $tmp ); |
|---|
| 309 | $i += 2; |
|---|
| 310 | } else { |
|---|
| 311 | break; |
|---|
| 312 | } |
|---|
| 313 | } elseif ( $b['type'] === 'phrasing' ) { |
|---|
| 314 | $type = $b['type']; |
|---|
| 315 | $tmp = $a['space'] . $b['raw']; |
|---|
| 316 | $content .= str_replace( "\n", "<br>\n", $tmp ); |
|---|
| 317 | $i += 2; |
|---|
| 318 | } elseif ( $b['type'] === 'comment' ) { |
|---|
| 319 | $type = $b['type']; |
|---|
| 320 | $content .= $a['space'] . $b['raw']; |
|---|
| 321 | $i += 2; |
|---|
| 322 | } else { |
|---|
| 323 | break; |
|---|
| 324 | } |
|---|
| 325 | } else { |
|---|
| 326 | break; |
|---|
| 327 | } |
|---|
| 328 | } |
|---|
| 329 | |
|---|
| 330 | $content = str_replace( "\n", "\n ", "\n" . $content ); |
|---|
| 331 | |
|---|
| 332 | if ( $parent['type'] === 'grouping' ) { |
|---|
| 333 | $out .= "\n" . $content . "\n"; |
|---|
| 334 | } else { |
|---|
| 335 | if ( $len === 1 && ! $this->isForceParagraph( $parent ) ) { |
|---|
| 336 | $out .= "\n" . $content . "\n"; |
|---|
| 337 | } else { |
|---|
| 338 | $out .= "\n<p>" . $content . "\n</p>\n"; |
|---|
| 339 | } |
|---|
| 340 | } |
|---|
| 341 | } // Text, Phrasing |
|---|
| 342 | |
|---|
| 343 | /** --------------------------- |
|---|
| 344 | * Container, Grouping |
|---|
| 345 | */ |
|---|
| 346 | |
|---|
| 347 | if ( $el['type'] === 'container' || $el['type'] === 'grouping' ) { |
|---|
| 348 | if ( isset( $el['revisit'] ) && $el['revisit'] ) { |
|---|
| 349 | $el['attrs'] = $el['attrs'] !== '' ? ' ' . $el['attrs'] : ''; |
|---|
| 350 | $out .= "\n" . '<' . $el['tag'] . $el['attrs'] . '>'; |
|---|
| 351 | $out .= str_replace( "\n", "\n ", "\n". trim( $el['out'] ) ); |
|---|
| 352 | $out .= "\n" . '</' . $el['tag'] . '>' . "\n"; |
|---|
| 353 | } else { |
|---|
| 354 | $stack[] = array( &$parent, $i ); |
|---|
| 355 | $stack[] = array( &$el, 0 ); |
|---|
| 356 | $el['revisit'] = true; |
|---|
| 357 | $el['out'] = ''; |
|---|
| 358 | break; |
|---|
| 359 | } |
|---|
| 360 | continue; |
|---|
| 361 | } // Container, Grouping |
|---|
| 362 | |
|---|
| 363 | /** --------------------------- |
|---|
| 364 | * Flow |
|---|
| 365 | */ |
|---|
| 366 | |
|---|
| 367 | if ( $el['type'] === 'flow' ) { |
|---|
| 368 | $out .= "\n" . $el['raw'] . "\n"; |
|---|
| 369 | continue; |
|---|
| 370 | } // Flow |
|---|
| 371 | |
|---|
| 372 | /** --------------------------- |
|---|
| 373 | * Comment |
|---|
| 374 | */ |
|---|
| 375 | |
|---|
| 376 | if ( $el['type'] === 'comment' ) { |
|---|
| 377 | $out .= "\n" . $el['raw'] . "\n"; |
|---|
| 378 | continue; |
|---|
| 379 | } // Comment |
|---|
| 380 | |
|---|
| 381 | } // For each element |
|---|
| 382 | } // While stack |
|---|
| 383 | |
|---|
| 384 | return trim( $out ); |
|---|
| 385 | } |
|---|
| 386 | |
|---|
| 387 | /** |
|---|
| 388 | * getRegex |
|---|
| 389 | */ |
|---|
| 390 | protected static function getRegex() { |
|---|
| 391 | return '%' |
|---|
| 392 | . '(?<raw>' |
|---|
| 393 | . '(?<text>' |
|---|
| 394 | . '[^<]+' |
|---|
| 395 | . ')' |
|---|
| 396 | . '|' |
|---|
| 397 | . '<!--(?<comment>.*?)-->' |
|---|
| 398 | . '|' |
|---|
| 399 | . '<(?<tag>' |
|---|
| 400 | . '[^[:space:]>]+' |
|---|
| 401 | . ')' |
|---|
| 402 | . '(?:' |
|---|
| 403 | . '[[:space:]]+' |
|---|
| 404 | . '(?<attrs>' |
|---|
| 405 | . '(?:' |
|---|
| 406 | . '"(?:\\\"|[^"])*"' |
|---|
| 407 | . '|' |
|---|
| 408 | . "'(?:\\\'|[^'])*'" |
|---|
| 409 | . '|' |
|---|
| 410 | . '(?:[^/>"\']|/(?!>))' |
|---|
| 411 | . ')*' |
|---|
| 412 | . ')' |
|---|
| 413 | . ')?' |
|---|
| 414 | . '[[:space:]]*' |
|---|
| 415 | . '(?:' |
|---|
| 416 | . '/>' |
|---|
| 417 | . '|' |
|---|
| 418 | . '>' |
|---|
| 419 | . '(?:' |
|---|
| 420 | . '(?<inner>' |
|---|
| 421 | . '(?R)*?' |
|---|
| 422 | . ')' |
|---|
| 423 | . '</\k<tag>>' |
|---|
| 424 | . ')?' |
|---|
| 425 | . ')' |
|---|
| 426 | . ')' |
|---|
| 427 | . '%isuS'; |
|---|
| 428 | } |
|---|
| 429 | } |
|---|
| 430 | |
|---|
| 431 | /** |
|---|
| 432 | * Replaces double line-breaks with paragraph elements. |
|---|
| 433 | */ |
|---|
| 434 | function wpautop_replacement( $output, $text, $br = true ) { |
|---|
| 435 | static $autop; |
|---|
| 436 | |
|---|
| 437 | if ( ! $autop ) { |
|---|
| 438 | $autop = new WP_Autop(); |
|---|
| 439 | } |
|---|
| 440 | |
|---|
| 441 | return $autop->autop( $text, $br ); |
|---|
| 442 | } |
|---|
| 443 | |
|---|
| 444 | add_filter( '_temp_wpautop', 'wpautop_replacement', 10, 2 ); |
|---|
| 445 | |
|---|