1 | <?php |
---|
2 | /* |
---|
3 | Plugin Name: WP Autop |
---|
4 | Plugin URI: http://wordpress.org/plugins/ |
---|
5 | Description: Feature plugin to improve the wpautop() functionality. |
---|
6 | Version: 0.1 |
---|
7 | |
---|
8 | Released under the GPL v.2, http://www.gnu.org/licenses/gpl-2.0.html |
---|
9 | |
---|
10 | This program is distributed in the hope that it will be useful, |
---|
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
13 | GNU General Public License for more details. |
---|
14 | */ |
---|
15 | |
---|
16 | class WP_Autop { |
---|
17 | /** |
---|
18 | * Most elements that are used in the body of documents |
---|
19 | * and applications are categorized as flow content. |
---|
20 | * |
---|
21 | * @see http://www.w3.org/TR/html5/dom.html#flow-content |
---|
22 | */ |
---|
23 | protected static $flowContent = array( |
---|
24 | 'a', 'abbr', 'address', 'area', 'article', 'aside', 'audio', 'b', 'bdi', |
---|
25 | 'bdo', 'blockquote', 'br', 'button', 'canvas', 'cite', 'code', 'data', |
---|
26 | 'datalist', 'del', 'dfn', 'div', 'dl', 'em', 'embed', 'fieldset', 'figure', |
---|
27 | 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hr', 'i', |
---|
28 | 'iframe', 'img', 'input', 'ins', 'kbd', 'keygen', 'label', 'main', 'map', |
---|
29 | 'mark', 'math', 'meter', 'nav', 'noscript', 'object', 'ol', 'output', 'p', |
---|
30 | 'pre', 'progress', 'q', 'ruby', 's', 'samp', 'script', 'section', 'select', |
---|
31 | 'small', 'span', 'strong', 'sub', 'sup', 'svg', 'table', 'template', |
---|
32 | 'textarea', 'time', 'u', 'ul', 'var', 'video', |
---|
33 | ); |
---|
34 | |
---|
35 | /** |
---|
36 | * Phrasing content is the text of the document, |
---|
37 | * as well as elements that mark up that text at the intra-paragraph level. |
---|
38 | * |
---|
39 | * @see http://www.w3.org/TR/html5/dom.html#phrasing-content |
---|
40 | */ |
---|
41 | protected static $phrasingContent = array( |
---|
42 | 'a', 'abbr', 'area', 'audio', 'b', 'bdi', 'bdo', 'br', 'button', 'canvas', |
---|
43 | 'cite', 'code', 'data', 'datalist', 'del', 'dfn', 'em', 'embed', 'i', |
---|
44 | 'iframe', 'img', 'input', 'ins', 'kbd', 'keygen', 'label', 'map', 'mark', |
---|
45 | 'math', 'meter', 'noscript', 'object', 'output', 'progress', 'q', 'ruby', |
---|
46 | 's', 'samp', 'script', 'select', 'small', 'span', 'strong', 'sub', 'sup', |
---|
47 | 'svg', 'template', 'textarea', 'time', 'u', 'var', 'video', 'wbr', |
---|
48 | ); |
---|
49 | |
---|
50 | /** |
---|
51 | * Grouping of elements where flow content is expected. |
---|
52 | * |
---|
53 | * @see http://www.w3.org/TR/html5/grouping-content.html |
---|
54 | * @see http://www.w3.org/TR/html5/tabular-data.html |
---|
55 | */ |
---|
56 | protected static $groupingContent = array( |
---|
57 | // Grouping content |
---|
58 | 'ol', 'ul', 'dl', |
---|
59 | // Tabular data |
---|
60 | 'table', 'tbody', 'thead', 'tfoot', 'tr', |
---|
61 | ); |
---|
62 | |
---|
63 | /** |
---|
64 | * Where flow content is expected. |
---|
65 | * |
---|
66 | * @see http://www.w3.org/TR/html5/grouping-content.html#the-p-element |
---|
67 | */ |
---|
68 | protected static $flowContainer = array( |
---|
69 | // Flow content |
---|
70 | /*'a',*/ 'address', 'article', 'aside', 'audio', 'blockquote', 'canvas', 'del', |
---|
71 | 'div', 'fieldset', 'figure', 'footer', 'form', 'header', 'iframe', 'ins', |
---|
72 | 'main', 'map', 'nav', 'noscript', 'object', 'section', 'video', |
---|
73 | // Grouping content |
---|
74 | 'li', 'dt', 'dd', 'figcaption', |
---|
75 | // Tabular data |
---|
76 | 'td', 'th', 'caption' |
---|
77 | ); |
---|
78 | |
---|
79 | protected static $forceParagraph = array( |
---|
80 | 'blockquote', |
---|
81 | ); |
---|
82 | |
---|
83 | protected function isFlowContent( $el ) { |
---|
84 | return isset( $el['tag'] ) && in_array( $el['tag'], self::$flowContent ); |
---|
85 | } |
---|
86 | |
---|
87 | protected function isPhrasingContent( $el ) { |
---|
88 | return isset( $el['tag'] ) && in_array( $el['tag'], self::$phrasingContent ); |
---|
89 | } |
---|
90 | |
---|
91 | protected function isGroupingContent( $el ) { |
---|
92 | return isset( $el['tag'] ) && in_array( $el['tag'], self::$groupingContent ); |
---|
93 | } |
---|
94 | |
---|
95 | protected function isFlowContainer( $el ) { |
---|
96 | return isset( $el['tag'] ) && in_array( $el['tag'], self::$flowContainer ); |
---|
97 | } |
---|
98 | |
---|
99 | protected function isForceParagraph( $el ) { |
---|
100 | return isset( $el['tag'] ) && in_array( $el['tag'], self::$forceParagraph ); |
---|
101 | } |
---|
102 | |
---|
103 | /** |
---|
104 | * autop |
---|
105 | */ |
---|
106 | public function autop( $text, $br = true ) { |
---|
107 | return $this->treeToString( $this->parse( $text ), $br ); |
---|
108 | } |
---|
109 | |
---|
110 | /** |
---|
111 | * parse |
---|
112 | */ |
---|
113 | public function parse( $text ) { |
---|
114 | $root = array( |
---|
115 | 'type' => 'root', 'children' => array(), |
---|
116 | ); |
---|
117 | |
---|
118 | $stack = array( |
---|
119 | array( preg_replace( '%\R%u', "\n", $text ), &$root ), |
---|
120 | ); |
---|
121 | |
---|
122 | /** ----------------------------- |
---|
123 | * While stack |
---|
124 | */ |
---|
125 | |
---|
126 | while ( $_s = array_pop( $stack ) ) { |
---|
127 | |
---|
128 | if ( strpos( $_s[0], '<' ) !== false ) { |
---|
129 | preg_match_all( self::getRegex(), $_s[0], $_m, PREG_SET_ORDER ); |
---|
130 | } else { |
---|
131 | $_m = array( array( 'text' => $_s[0] ) ); |
---|
132 | } |
---|
133 | |
---|
134 | $el = &$_s[1]; |
---|
135 | $c = -1; |
---|
136 | |
---|
137 | /** ----------------------------- |
---|
138 | * For each element ( text, tags, comments ) |
---|
139 | */ |
---|
140 | |
---|
141 | for ( $i = 0, $ilen = count( $_m ); $i < $ilen; $i++ ) { |
---|
142 | $m = $_m[$i]; |
---|
143 | |
---|
144 | /** --------------------------- |
---|
145 | * Text |
---|
146 | */ |
---|
147 | |
---|
148 | $m['text'] = isset( $m['text'] ) ? $m['text'] : null; |
---|
149 | |
---|
150 | if ( $m['text'] !== null && $m['text'] !== '' ) { |
---|
151 | |
---|
152 | preg_match( '%^(?<a>[[:space:]]*).*?(?<b>[[:space:]]*)$%su', $m['text'], $s ); |
---|
153 | |
---|
154 | $spacePattern = '%([[:space:]]*(\R)[[:space:]]*){2,}%u'; |
---|
155 | |
---|
156 | if ( $i !== 0 && $i !== $ilen - 1 && ! preg_match( $spacePattern, $s['a'] ) ) { |
---|
157 | $el['children'][++$c] = array( |
---|
158 | 'type' => 'space', |
---|
159 | 'space' => $s['a'], |
---|
160 | ); |
---|
161 | } |
---|
162 | |
---|
163 | $m['text'] = trim( $m['text'] ); |
---|
164 | |
---|
165 | if ( $m['text'] !== '' ) { |
---|
166 | $_p = preg_split( $spacePattern, $m['text'] ); |
---|
167 | |
---|
168 | foreach ( $_p as $p ) { |
---|
169 | $el['children'][++$c] = array( |
---|
170 | 'type' => 'text', |
---|
171 | 'text' => $p, |
---|
172 | ); |
---|
173 | } |
---|
174 | |
---|
175 | if ( $i !== $ilen - 1 && ! preg_match( $spacePattern, $s['b'] ) ) { |
---|
176 | $el['children'][++$c] = array( |
---|
177 | 'type' => 'space', |
---|
178 | 'space' => $s['b'], |
---|
179 | ); |
---|
180 | } |
---|
181 | } |
---|
182 | continue; |
---|
183 | } // Text |
---|
184 | |
---|
185 | /** --------------------------- |
---|
186 | * Tag |
---|
187 | */ |
---|
188 | |
---|
189 | $m['raw'] = isset( $m['raw'] ) ? $m['raw'] : null; |
---|
190 | $m['tag'] = isset( $m['tag'] ) ? $m['tag'] : null; |
---|
191 | $m['attrs'] = isset( $m['attrs'] ) ? $m['attrs'] : null; |
---|
192 | $m['inner'] = isset( $m['inner'] ) ? $m['inner'] : null; |
---|
193 | |
---|
194 | if ( $m['tag'] !== null && $m['tag'] !== '' ) { |
---|
195 | |
---|
196 | $m['tag'] = strtolower( $m['tag'] ); |
---|
197 | |
---|
198 | if ( $this->isFlowContainer( $m ) ) { |
---|
199 | |
---|
200 | $el['children'][++$c] = array( |
---|
201 | 'type' => 'container', |
---|
202 | 'tag' => $m['tag'], |
---|
203 | 'attrs' => $m['attrs'], |
---|
204 | 'children' => array(), |
---|
205 | ); |
---|
206 | |
---|
207 | if ( $m['inner'] !== null && $m['inner'] !== '' ) { |
---|
208 | array_push( $stack, array( $m['inner'], &$el['children'][$c] ) ); |
---|
209 | } |
---|
210 | |
---|
211 | } elseif ( $this->isGroupingContent( $m ) ) { |
---|
212 | |
---|
213 | $el['children'][++$c] = array( |
---|
214 | 'type' => 'grouping', |
---|
215 | 'tag' => $m['tag'], |
---|
216 | 'attrs' => $m['attrs'], |
---|
217 | 'children' => array(), |
---|
218 | ); |
---|
219 | |
---|
220 | if ( $m['inner'] !== null && $m['inner'] !== '' ) { |
---|
221 | array_push( $stack, array( $m['inner'], &$el['children'][$c] ) ); |
---|
222 | } |
---|
223 | |
---|
224 | } elseif ( $this->isPhrasingContent( $m ) ) { |
---|
225 | |
---|
226 | $el['children'][++$c] = array( |
---|
227 | 'type' => 'phrasing', |
---|
228 | 'raw' => $m['raw'], |
---|
229 | ); |
---|
230 | |
---|
231 | } else { |
---|
232 | |
---|
233 | $el['children'][++$c] = array( |
---|
234 | 'type' => 'flow', |
---|
235 | 'raw' => $m['raw'], |
---|
236 | ); |
---|
237 | } |
---|
238 | continue; |
---|
239 | } // Tag |
---|
240 | |
---|
241 | /** --------------------------- |
---|
242 | * Comment |
---|
243 | */ |
---|
244 | |
---|
245 | $m['comment'] = isset( $m['comment'] ) ? $m['comment'] : null; |
---|
246 | |
---|
247 | if ( $m['comment'] !== null && $m['comment'] !== '' ) { |
---|
248 | |
---|
249 | $el['children'][++$c] = array( |
---|
250 | 'type' => 'comment', |
---|
251 | 'raw' => $m['raw'], |
---|
252 | ); |
---|
253 | continue; |
---|
254 | } // Comment |
---|
255 | |
---|
256 | } // For each element |
---|
257 | } // While stack |
---|
258 | |
---|
259 | return $root; |
---|
260 | } |
---|
261 | |
---|
262 | /** |
---|
263 | * treeToString |
---|
264 | */ |
---|
265 | protected function treeToString( $root, $br = true ) { |
---|
266 | $stack = array( array( &$root, 0 ) ); |
---|
267 | $root['out'] = ''; |
---|
268 | |
---|
269 | /** ----------------------------- |
---|
270 | * While stack |
---|
271 | */ |
---|
272 | |
---|
273 | while ( $_s = array_pop( $stack ) ) { |
---|
274 | $parent = &$_s[0]; |
---|
275 | $c = $_s[1]; |
---|
276 | $out = &$parent['out']; |
---|
277 | |
---|
278 | /** ----------------------------- |
---|
279 | * For each element ( text, tags, comments ) |
---|
280 | */ |
---|
281 | |
---|
282 | for ( $i = $c, $len = count( $parent['children'] ); $i < $len; $i++ ) { |
---|
283 | $el = &$parent['children'][$i]; |
---|
284 | |
---|
285 | /** --------------------------- |
---|
286 | * Text, Phrasing |
---|
287 | */ |
---|
288 | |
---|
289 | if ( $el['type'] === 'text' || $el['type'] === 'phrasing' ) { |
---|
290 | $type = $el['type']; |
---|
291 | |
---|
292 | if ( $type === 'text' ) { |
---|
293 | $content = str_replace( "\n", "<br>\n", $el['text'] ); |
---|
294 | } else { |
---|
295 | $content = $el['raw']; |
---|
296 | } |
---|
297 | |
---|
298 | $j = $i; |
---|
299 | while ( isset( $parent['children'][$i + 2] ) ) { |
---|
300 | $a = $parent['children'][$i + 1]; |
---|
301 | $b = $parent['children'][$i + 2]; |
---|
302 | |
---|
303 | if ( $a['type'] === 'space' ) { |
---|
304 | if ( $b['type'] === 'text' ) { |
---|
305 | if ( $type !== 'text' ) { |
---|
306 | $type = $b['type']; |
---|
307 | $tmp = $a['space'] . $b['text']; |
---|
308 | $content .= str_replace( "\n", "<br>\n", $tmp ); |
---|
309 | $i += 2; |
---|
310 | } else { |
---|
311 | break; |
---|
312 | } |
---|
313 | } elseif ( $b['type'] === 'phrasing' ) { |
---|
314 | $type = $b['type']; |
---|
315 | $tmp = $a['space'] . $b['raw']; |
---|
316 | $content .= str_replace( "\n", "<br>\n", $tmp ); |
---|
317 | $i += 2; |
---|
318 | } elseif ( $b['type'] === 'comment' ) { |
---|
319 | $type = $b['type']; |
---|
320 | $content .= $a['space'] . $b['raw']; |
---|
321 | $i += 2; |
---|
322 | } else { |
---|
323 | break; |
---|
324 | } |
---|
325 | } else { |
---|
326 | break; |
---|
327 | } |
---|
328 | } |
---|
329 | |
---|
330 | $content = str_replace( "\n", "\n ", "\n" . $content ); |
---|
331 | |
---|
332 | if ( $parent['type'] === 'grouping' ) { |
---|
333 | $out .= "\n" . $content . "\n"; |
---|
334 | } else { |
---|
335 | if ( $len === 1 && ! $this->isForceParagraph( $parent ) ) { |
---|
336 | $out .= "\n" . $content . "\n"; |
---|
337 | } else { |
---|
338 | $out .= "\n<p>" . $content . "\n</p>\n"; |
---|
339 | } |
---|
340 | } |
---|
341 | } // Text, Phrasing |
---|
342 | |
---|
343 | /** --------------------------- |
---|
344 | * Container, Grouping |
---|
345 | */ |
---|
346 | |
---|
347 | if ( $el['type'] === 'container' || $el['type'] === 'grouping' ) { |
---|
348 | if ( isset( $el['revisit'] ) && $el['revisit'] ) { |
---|
349 | $el['attrs'] = $el['attrs'] !== '' ? ' ' . $el['attrs'] : ''; |
---|
350 | $out .= "\n" . '<' . $el['tag'] . $el['attrs'] . '>'; |
---|
351 | $out .= str_replace( "\n", "\n ", "\n". trim( $el['out'] ) ); |
---|
352 | $out .= "\n" . '</' . $el['tag'] . '>' . "\n"; |
---|
353 | } else { |
---|
354 | $stack[] = array( &$parent, $i ); |
---|
355 | $stack[] = array( &$el, 0 ); |
---|
356 | $el['revisit'] = true; |
---|
357 | $el['out'] = ''; |
---|
358 | break; |
---|
359 | } |
---|
360 | continue; |
---|
361 | } // Container, Grouping |
---|
362 | |
---|
363 | /** --------------------------- |
---|
364 | * Flow |
---|
365 | */ |
---|
366 | |
---|
367 | if ( $el['type'] === 'flow' ) { |
---|
368 | $out .= "\n" . $el['raw'] . "\n"; |
---|
369 | continue; |
---|
370 | } // Flow |
---|
371 | |
---|
372 | /** --------------------------- |
---|
373 | * Comment |
---|
374 | */ |
---|
375 | |
---|
376 | if ( $el['type'] === 'comment' ) { |
---|
377 | $out .= "\n" . $el['raw'] . "\n"; |
---|
378 | continue; |
---|
379 | } // Comment |
---|
380 | |
---|
381 | } // For each element |
---|
382 | } // While stack |
---|
383 | |
---|
384 | return trim( $out ); |
---|
385 | } |
---|
386 | |
---|
387 | /** |
---|
388 | * getRegex |
---|
389 | */ |
---|
390 | protected static function getRegex() { |
---|
391 | return '%' |
---|
392 | . '(?<raw>' |
---|
393 | . '(?<text>' |
---|
394 | . '[^<]+' |
---|
395 | . ')' |
---|
396 | . '|' |
---|
397 | . '<!--(?<comment>.*?)-->' |
---|
398 | . '|' |
---|
399 | . '<(?<tag>' |
---|
400 | . '[^[:space:]>]+' |
---|
401 | . ')' |
---|
402 | . '(?:' |
---|
403 | . '[[:space:]]+' |
---|
404 | . '(?<attrs>' |
---|
405 | . '(?:' |
---|
406 | . '"(?:\\\"|[^"])*"' |
---|
407 | . '|' |
---|
408 | . "'(?:\\\'|[^'])*'" |
---|
409 | . '|' |
---|
410 | . '(?:[^/>"\']|/(?!>))' |
---|
411 | . ')*' |
---|
412 | . ')' |
---|
413 | . ')?' |
---|
414 | . '[[:space:]]*' |
---|
415 | . '(?:' |
---|
416 | . '/>' |
---|
417 | . '|' |
---|
418 | . '>' |
---|
419 | . '(?:' |
---|
420 | . '(?<inner>' |
---|
421 | . '(?R)*?' |
---|
422 | . ')' |
---|
423 | . '</\k<tag>>' |
---|
424 | . ')?' |
---|
425 | . ')' |
---|
426 | . ')' |
---|
427 | . '%isuS'; |
---|
428 | } |
---|
429 | } |
---|
430 | |
---|
431 | /** |
---|
432 | * Replaces double line-breaks with paragraph elements. |
---|
433 | */ |
---|
434 | function wpautop_replacement( $output, $text, $br = true ) { |
---|
435 | static $autop; |
---|
436 | |
---|
437 | if ( ! $autop ) { |
---|
438 | $autop = new WP_Autop(); |
---|
439 | } |
---|
440 | |
---|
441 | return $autop->autop( $text, $br ); |
---|
442 | } |
---|
443 | |
---|
444 | add_filter( '_temp_wpautop', 'wpautop_replacement', 10, 2 ); |
---|
445 | |
---|