1 | <?php |
---|
2 | /** |
---|
3 | * WP_Formatting_AutoP |
---|
4 | */ |
---|
5 | |
---|
6 | class WP_Formatting_AutoP |
---|
7 | { |
---|
8 | /** |
---|
9 | * Most elements that are used in the body of documents |
---|
10 | * and applications are categorized as flow content. |
---|
11 | * |
---|
12 | * @see http://www.w3.org/TR/html5/dom.html#flow-content |
---|
13 | */ |
---|
14 | protected static $flowContent = array( |
---|
15 | 'a', 'abbr', 'address', 'area', 'article', 'aside', 'audio', 'b', 'bdi', |
---|
16 | 'bdo', 'blockquote', 'br', 'button', 'canvas', 'cite', 'code', 'data', |
---|
17 | 'datalist', 'del', 'dfn', 'div', 'dl', 'em', 'embed', 'fieldset', 'figure', |
---|
18 | 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hr', 'i', |
---|
19 | 'iframe', 'img', 'input', 'ins', 'kbd', 'keygen', 'label', 'main', 'map', |
---|
20 | 'mark', 'math', 'meter', 'nav', 'noscript', 'object', 'ol', 'output', 'p', |
---|
21 | 'pre', 'progress', 'q', 'ruby', 's', 'samp', 'script', 'section', 'select', |
---|
22 | 'small', 'span', 'strong', 'sub', 'sup', 'svg', 'table', 'template', |
---|
23 | 'textarea', 'time', 'u', 'ul', 'var', 'video', |
---|
24 | ); |
---|
25 | |
---|
26 | /** |
---|
27 | * Phrasing content is the text of the document, |
---|
28 | * as well as elements that mark up that text at the intra-paragraph level. |
---|
29 | * |
---|
30 | * @see http://www.w3.org/TR/html5/dom.html#phrasing-content |
---|
31 | */ |
---|
32 | protected static $phrasingContent = array( |
---|
33 | 'a', 'abbr', 'area', 'audio', 'b', 'bdi', 'bdo', 'br', 'button', 'canvas', |
---|
34 | 'cite', 'code', 'data', 'datalist', 'del', 'dfn', 'em', 'embed', 'i', |
---|
35 | 'iframe', 'img', 'input', 'ins', 'kbd', 'keygen', 'label', 'map', 'mark', |
---|
36 | 'math', 'meter', 'noscript', 'object', 'output', 'progress', 'q', 'ruby', |
---|
37 | 's', 'samp', 'script', 'select', 'small', 'span', 'strong', 'sub', 'sup', |
---|
38 | 'svg', 'template', 'textarea', 'time', 'u', 'var', 'video', 'wbr', |
---|
39 | ); |
---|
40 | |
---|
41 | /** |
---|
42 | * Grouping of elements where flow content is expected. |
---|
43 | * |
---|
44 | * @see http://www.w3.org/TR/html5/grouping-content.html |
---|
45 | * @see http://www.w3.org/TR/html5/tabular-data.html |
---|
46 | */ |
---|
47 | protected static $groupingContent = array( |
---|
48 | // Grouping content |
---|
49 | 'ol', 'ul', 'dl', |
---|
50 | // Tabular data |
---|
51 | 'table', 'tbody', 'thead', 'tfoot', 'tr', |
---|
52 | ); |
---|
53 | |
---|
54 | /** |
---|
55 | * Where flow content is expected. |
---|
56 | * |
---|
57 | * @see http://www.w3.org/TR/html5/grouping-content.html#the-p-element |
---|
58 | */ |
---|
59 | protected static $flowContainer = array( |
---|
60 | // Flow content |
---|
61 | /*'a',*/ 'address', 'article', 'aside', 'audio', 'blockquote', 'canvas', 'del', |
---|
62 | 'div', 'fieldset', 'figure', 'footer', 'form', 'header', 'iframe', 'ins', |
---|
63 | 'main', 'map', 'nav', 'noscript', 'object', 'section', 'video', |
---|
64 | // Grouping content |
---|
65 | 'li', 'dt', 'dd', 'figcaption', |
---|
66 | // Tabular data |
---|
67 | 'td', 'th', 'caption' |
---|
68 | ); |
---|
69 | |
---|
70 | protected static $forceParagraph = array( |
---|
71 | 'blockquote', |
---|
72 | ); |
---|
73 | |
---|
74 | protected function isFlowContent($el) |
---|
75 | { |
---|
76 | return isset($el['tag']) && in_array($el['tag'], self::$flowContent); |
---|
77 | } |
---|
78 | |
---|
79 | protected function isPhrasingContent($el) |
---|
80 | { |
---|
81 | return isset($el['tag']) && in_array($el['tag'], self::$phrasingContent); |
---|
82 | } |
---|
83 | |
---|
84 | protected function isGroupingContent($el) |
---|
85 | { |
---|
86 | return isset($el['tag']) && in_array($el['tag'], self::$groupingContent); |
---|
87 | } |
---|
88 | |
---|
89 | protected function isFlowContainer($el) |
---|
90 | { |
---|
91 | return isset($el['tag']) && in_array($el['tag'], self::$flowContainer); |
---|
92 | } |
---|
93 | |
---|
94 | protected function isForceParagraph($el) |
---|
95 | { |
---|
96 | return isset($el['tag']) && in_array($el['tag'], self::$forceParagraph); |
---|
97 | } |
---|
98 | |
---|
99 | /** |
---|
100 | * autop |
---|
101 | */ |
---|
102 | public function autop($text, $br = true) |
---|
103 | { |
---|
104 | return $this->treeToString($this->parse($text), $br); |
---|
105 | } |
---|
106 | |
---|
107 | /** |
---|
108 | * parse |
---|
109 | */ |
---|
110 | public function parse($text) |
---|
111 | { |
---|
112 | $root = array( |
---|
113 | 'type' => 'root', 'children' => array(), |
---|
114 | ); |
---|
115 | |
---|
116 | $stack = array( |
---|
117 | array(preg_replace('%\R%u', "\n", $text), &$root), |
---|
118 | ); |
---|
119 | |
---|
120 | /** ----------------------------- |
---|
121 | * While stack |
---|
122 | */ |
---|
123 | |
---|
124 | while ($_s = array_pop($stack)) { |
---|
125 | |
---|
126 | if (strpos($_s[0], '<') !== false) { |
---|
127 | preg_match_all(self::getRegex(), $_s[0], $_m, PREG_SET_ORDER); |
---|
128 | } else { |
---|
129 | $_m = array(array('text' => $_s[0])); |
---|
130 | } |
---|
131 | |
---|
132 | $el = &$_s[1]; |
---|
133 | $c = -1; |
---|
134 | |
---|
135 | /** ----------------------------- |
---|
136 | * For each element (text, tags, comments) |
---|
137 | */ |
---|
138 | |
---|
139 | for ($i = 0, $ilen = count($_m); $i < $ilen; $i++) { |
---|
140 | $m = $_m[$i]; |
---|
141 | |
---|
142 | /** --------------------------- |
---|
143 | * Text |
---|
144 | */ |
---|
145 | |
---|
146 | $m['text'] = isset($m['text']) ? $m['text'] : null; |
---|
147 | |
---|
148 | if ($m['text'] !== null && $m['text'] !== '') { |
---|
149 | |
---|
150 | preg_match('%^(?<a>[[:space:]]*).*?(?<b>[[:space:]]*)$%su', $m['text'], $s); |
---|
151 | |
---|
152 | $spacePattern = '%([[:space:]]*(\R)[[:space:]]*){2,}%u'; |
---|
153 | |
---|
154 | if ($i !== 0 && $i !== $ilen - 1 && !preg_match($spacePattern, $s['a'])) { |
---|
155 | $el['children'][++$c] = array( |
---|
156 | 'type' => 'space', |
---|
157 | 'space' => $s['a'], |
---|
158 | ); |
---|
159 | } |
---|
160 | |
---|
161 | $m['text'] = trim($m['text']); |
---|
162 | |
---|
163 | if ($m['text'] !== '') { |
---|
164 | $_p = preg_split($spacePattern, $m['text']); |
---|
165 | |
---|
166 | foreach ($_p as $p) { |
---|
167 | $el['children'][++$c] = array( |
---|
168 | 'type' => 'text', |
---|
169 | 'text' => $p, |
---|
170 | ); |
---|
171 | } |
---|
172 | |
---|
173 | if ($i !== $ilen - 1 && !preg_match($spacePattern, $s['b'])) { |
---|
174 | $el['children'][++$c] = array( |
---|
175 | 'type' => 'space', |
---|
176 | 'space' => $s['b'], |
---|
177 | ); |
---|
178 | } |
---|
179 | } |
---|
180 | continue; |
---|
181 | } // Text |
---|
182 | |
---|
183 | /** --------------------------- |
---|
184 | * Tag |
---|
185 | */ |
---|
186 | |
---|
187 | $m['raw'] = isset($m['raw']) ? $m['raw'] : null; |
---|
188 | $m['tag'] = isset($m['tag']) ? $m['tag'] : null; |
---|
189 | $m['attrs'] = isset($m['attrs']) ? $m['attrs'] : null; |
---|
190 | $m['inner'] = isset($m['inner']) ? $m['inner'] : null; |
---|
191 | |
---|
192 | if ($m['tag'] !== null && $m['tag'] !== '') { |
---|
193 | |
---|
194 | $m['tag'] = strtolower($m['tag']); |
---|
195 | |
---|
196 | if ($this->isFlowContainer($m)) { |
---|
197 | |
---|
198 | $el['children'][++$c] = array( |
---|
199 | 'type' => 'container', |
---|
200 | 'tag' => $m['tag'], |
---|
201 | 'attrs' => $m['attrs'], |
---|
202 | 'children' => array(), |
---|
203 | ); |
---|
204 | |
---|
205 | if ($m['inner'] !== null && $m['inner'] !== '') { |
---|
206 | array_push($stack, array($m['inner'], &$el['children'][$c])); |
---|
207 | } |
---|
208 | |
---|
209 | } else if ($this->isGroupingContent($m)) { |
---|
210 | |
---|
211 | $el['children'][++$c] = array( |
---|
212 | 'type' => 'grouping', |
---|
213 | 'tag' => $m['tag'], |
---|
214 | 'attrs' => $m['attrs'], |
---|
215 | 'children' => array(), |
---|
216 | ); |
---|
217 | |
---|
218 | if ($m['inner'] !== null && $m['inner'] !== '') { |
---|
219 | array_push($stack, array($m['inner'], &$el['children'][$c])); |
---|
220 | } |
---|
221 | |
---|
222 | } else if ($this->isPhrasingContent($m)) { |
---|
223 | |
---|
224 | $el['children'][++$c] = array( |
---|
225 | 'type' => 'phrasing', |
---|
226 | 'raw' => $m['raw'], |
---|
227 | ); |
---|
228 | |
---|
229 | } else { |
---|
230 | |
---|
231 | $el['children'][++$c] = array( |
---|
232 | 'type' => 'flow', |
---|
233 | 'raw' => $m['raw'], |
---|
234 | ); |
---|
235 | } |
---|
236 | continue; |
---|
237 | } // Tag |
---|
238 | |
---|
239 | /** --------------------------- |
---|
240 | * Comment |
---|
241 | */ |
---|
242 | |
---|
243 | $m['comment'] = isset($m['comment']) ? $m['comment'] : null; |
---|
244 | |
---|
245 | if ($m['comment'] !== null && $m['comment'] !== '') { |
---|
246 | |
---|
247 | $el['children'][++$c] = array( |
---|
248 | 'type' => 'comment', |
---|
249 | 'raw' => $m['raw'], |
---|
250 | ); |
---|
251 | continue; |
---|
252 | } // Comment |
---|
253 | |
---|
254 | } // For each element |
---|
255 | } // While stack |
---|
256 | |
---|
257 | return $root; |
---|
258 | } |
---|
259 | |
---|
260 | /** |
---|
261 | * treeToString |
---|
262 | */ |
---|
263 | protected function treeToString($root, $br = true) |
---|
264 | { |
---|
265 | $stack = array(array(&$root, 0)); |
---|
266 | $root['out'] = ''; |
---|
267 | |
---|
268 | /** ----------------------------- |
---|
269 | * While stack |
---|
270 | */ |
---|
271 | |
---|
272 | while ($_s = array_pop($stack)) { |
---|
273 | $parent = &$_s[0]; |
---|
274 | $c = $_s[1]; |
---|
275 | $out = &$parent['out']; |
---|
276 | |
---|
277 | /** ----------------------------- |
---|
278 | * For each element (text, tags, comments) |
---|
279 | */ |
---|
280 | |
---|
281 | for ($i = $c, $len = count($parent['children']); $i < $len; $i++) { |
---|
282 | $el = &$parent['children'][$i]; |
---|
283 | |
---|
284 | /** --------------------------- |
---|
285 | * Text, Phrasing |
---|
286 | */ |
---|
287 | |
---|
288 | if ($el['type'] === 'text' || $el['type'] === 'phrasing') { |
---|
289 | $type = $el['type']; |
---|
290 | if ($type === 'text') { |
---|
291 | $content = str_replace("\n", "<br>\n", $el['text']); |
---|
292 | } else { |
---|
293 | $content = $el['raw']; |
---|
294 | } |
---|
295 | $j = $i; |
---|
296 | while (isset($parent['children'][$i + 2])) { |
---|
297 | $a = $parent['children'][$i + 1]; |
---|
298 | $b = $parent['children'][$i + 2]; |
---|
299 | |
---|
300 | if ($a['type'] === 'space') { |
---|
301 | if ($b['type'] === 'text') { |
---|
302 | if ($type !== 'text') { |
---|
303 | $type = $b['type']; |
---|
304 | $tmp = $a['space'] . $b['text']; |
---|
305 | $content .= str_replace("\n", "<br>\n", $tmp); |
---|
306 | $i += 2; |
---|
307 | } else break; |
---|
308 | } else if ($b['type'] === 'phrasing') { |
---|
309 | $type = $b['type']; |
---|
310 | $tmp = $a['space'] . $b['raw']; |
---|
311 | $content .= str_replace("\n", "<br>\n", $tmp); |
---|
312 | $i += 2; |
---|
313 | } else if ($b['type'] === 'comment') { |
---|
314 | $type = $b['type']; |
---|
315 | $content .= $a['space'] . $b['raw']; |
---|
316 | $i += 2; |
---|
317 | } else break; |
---|
318 | } else break; |
---|
319 | } |
---|
320 | |
---|
321 | $content = str_replace("\n", "\n ", "\n" . $content); |
---|
322 | |
---|
323 | if ($parent['type'] === 'grouping') { |
---|
324 | $out .= "\n" . $content . "\n"; |
---|
325 | } else { |
---|
326 | if ($len === 1 && !$this->isForceParagraph($parent)) { |
---|
327 | $out .= "\n" . $content . "\n"; |
---|
328 | } else { |
---|
329 | $out .= "\n<p>" . $content . "\n</p>\n"; |
---|
330 | } |
---|
331 | } |
---|
332 | } // Text, Phrasing |
---|
333 | |
---|
334 | /** --------------------------- |
---|
335 | * Container, Grouping |
---|
336 | */ |
---|
337 | |
---|
338 | if ($el['type'] === 'container' || $el['type'] === 'grouping') { |
---|
339 | if (isset($el['revisit']) && $el['revisit']) { |
---|
340 | $el['attrs'] = $el['attrs'] !== '' ? ' ' . $el['attrs'] : ''; |
---|
341 | $out .= "\n" . '<' . $el['tag'] . $el['attrs'] . '>'; |
---|
342 | $out .= str_replace("\n", "\n ", "\n". trim($el['out'])); |
---|
343 | $out .= "\n" . '</' . $el['tag'] . '>' . "\n"; |
---|
344 | } else { |
---|
345 | $stack[] = array(&$parent, $i); |
---|
346 | $stack[] = array(&$el, 0); |
---|
347 | $el['revisit'] = true; |
---|
348 | $el['out'] = ''; |
---|
349 | break; |
---|
350 | } |
---|
351 | continue; |
---|
352 | } // Container, Grouping |
---|
353 | |
---|
354 | /** --------------------------- |
---|
355 | * Flow |
---|
356 | */ |
---|
357 | |
---|
358 | if ($el['type'] === 'flow') { |
---|
359 | $out .= "\n" . $el['raw'] . "\n"; |
---|
360 | continue; |
---|
361 | } // Flow |
---|
362 | |
---|
363 | /** --------------------------- |
---|
364 | * Comment |
---|
365 | */ |
---|
366 | |
---|
367 | if ($el['type'] === 'comment') { |
---|
368 | $out .= "\n" . $el['raw'] . "\n"; |
---|
369 | continue; |
---|
370 | } // Comment |
---|
371 | |
---|
372 | } // For each element |
---|
373 | } // While stack |
---|
374 | |
---|
375 | return trim($out); |
---|
376 | } |
---|
377 | |
---|
378 | /** |
---|
379 | * getRegex |
---|
380 | */ |
---|
381 | protected static function getRegex() |
---|
382 | { |
---|
383 | return '%' |
---|
384 | . '(?<raw>' |
---|
385 | . '(?<text>' |
---|
386 | . '[^<]+' |
---|
387 | . ')' |
---|
388 | . '|' |
---|
389 | . '<!--(?<comment>.*?)-->' |
---|
390 | . '|' |
---|
391 | . '<(?<tag>' |
---|
392 | . '[^[:space:]>]+' |
---|
393 | . ')' |
---|
394 | . '(?:' |
---|
395 | . '[[:space:]]+' |
---|
396 | . '(?<attrs>' |
---|
397 | . '(?:' |
---|
398 | . '"(?:\\\"|[^"])*"' |
---|
399 | . '|' |
---|
400 | . "'(?:\\\'|[^'])*'" |
---|
401 | . '|' |
---|
402 | . '(?:[^/>"\']|/(?!>))' |
---|
403 | . ')*' |
---|
404 | . ')' |
---|
405 | . ')?' |
---|
406 | . '[[:space:]]*' |
---|
407 | . '(?:' |
---|
408 | . '/>' |
---|
409 | . '|' |
---|
410 | . '>' |
---|
411 | . '(?:' |
---|
412 | . '(?<inner>' |
---|
413 | . '(?R)*?' |
---|
414 | . ')' |
---|
415 | . '</\k<tag>>' |
---|
416 | . ')?' |
---|
417 | . ')' |
---|
418 | . ')' |
---|
419 | . '%isuS'; |
---|
420 | } |
---|
421 | } |
---|
422 | |
---|
423 | /** |
---|
424 | * Replaces double line-breaks with paragraph elements. |
---|
425 | */ |
---|
426 | function wpautop_new($text) { |
---|
427 | static $autop; |
---|
428 | if (!$autop) { |
---|
429 | $autop = new WP_Formatting_AutoP(); |
---|
430 | } |
---|
431 | return $autop->autop($text); |
---|
432 | } |
---|