<?php
/*
Plugin Name: WP Autop
Plugin URI: http://wordpress.org/plugins/
Description: Feature plugin to improve the wpautop() functionality.
Version: 0.1

Released under the GPL v.2, http://www.gnu.org/licenses/gpl-2.0.html

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.
*/

class WP_Autop {
	/**
	 * Most elements that are used in the body of documents
	 * and applications are categorized as flow content.
	 *
	 * @see http://www.w3.org/TR/html5/dom.html#flow-content
	 */
	protected static $flowContent = array(
		'a', 'abbr', 'address', 'area', 'article', 'aside', 'audio', 'b', 'bdi',
		'bdo', 'blockquote', 'br', 'button', 'canvas', 'cite', 'code', 'data',
		'datalist', 'del', 'dfn', 'div', 'dl', 'em', 'embed', 'fieldset', 'figure',
		'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hr', 'i',
		'iframe', 'img', 'input', 'ins', 'kbd', 'keygen', 'label', 'main', 'map',
		'mark', 'math', 'meter', 'nav', 'noscript', 'object', 'ol', 'output', 'p',
		'pre', 'progress', 'q', 'ruby', 's', 'samp', 'script', 'section', 'select',
		'small', 'span', 'strong', 'sub', 'sup', 'svg', 'table', 'template',
		'textarea', 'time', 'u', 'ul', 'var', 'video',
	);

	/**
	 * Phrasing content is the text of the document,
	 * as well as elements that mark up that text at the intra-paragraph level.
	 *
	 * @see http://www.w3.org/TR/html5/dom.html#phrasing-content
	 */
	protected static $phrasingContent = array(
		'a', 'abbr', 'area', 'audio', 'b', 'bdi', 'bdo', 'br', 'button', 'canvas',
		'cite', 'code', 'data', 'datalist', 'del', 'dfn', 'em', 'embed', 'i',
		'iframe', 'img', 'input', 'ins', 'kbd', 'keygen', 'label', 'map', 'mark',
		'math', 'meter', 'noscript', 'object', 'output', 'progress', 'q', 'ruby',
		's', 'samp', 'script', 'select', 'small', 'span', 'strong', 'sub', 'sup',
		'svg', 'template', 'textarea', 'time', 'u', 'var', 'video', 'wbr',
	);

	/**
	 * Grouping of elements where flow content is expected.
	 *
	 * @see http://www.w3.org/TR/html5/grouping-content.html
	 * @see http://www.w3.org/TR/html5/tabular-data.html
	 */
	protected static $groupingContent = array(
		// Grouping content
		'ol', 'ul', 'dl',
		// Tabular data
		'table', 'tbody', 'thead', 'tfoot', 'tr',
	);

	/**
	 * Where flow content is expected.
	 *
	 * @see http://www.w3.org/TR/html5/grouping-content.html#the-p-element
	 */
	protected static $flowContainer = array(
		// Flow content
		/*'a',*/ 'address', 'article', 'aside', 'audio', 'blockquote', 'canvas', 'del',
		'div', 'fieldset', 'figure', 'footer', 'form', 'header', 'iframe', 'ins',
		'main', 'map', 'nav', 'noscript', 'object', 'section', 'video',
		// Grouping content
		'li', 'dt', 'dd', 'figcaption',
		// Tabular data
		'td', 'th', 'caption'
	);

	protected static $forceParagraph = array(
		'blockquote',
	);

	protected function isFlowContent( $el ) {
		return isset( $el['tag'] ) && in_array( $el['tag'], self::$flowContent );
	}

	protected function isPhrasingContent( $el ) {
		return isset( $el['tag'] ) && in_array( $el['tag'], self::$phrasingContent );
	}

	protected function isGroupingContent( $el ) {
		return isset( $el['tag'] ) && in_array( $el['tag'], self::$groupingContent );
	}

	protected function isFlowContainer( $el ) {
		return isset( $el['tag'] ) && in_array( $el['tag'], self::$flowContainer );
	}

	protected function isForceParagraph( $el ) {
		return isset( $el['tag'] ) && in_array( $el['tag'], self::$forceParagraph );
	}

	/**
	 * autop
	 */
	public function autop( $text, $br = true ) {
		return $this->treeToString( $this->parse( $text ), $br );
	}

	/**
	 * parse
	 */
	public function parse( $text ) {
		$root = array(
			'type' => 'root', 'children' => array(),
		);

		$stack = array(
			array( preg_replace( '%\R%u', "\n", $text ), &$root ),
		);

		/** -----------------------------
		 * While stack
		 */

		while ( $_s = array_pop( $stack ) ) {

			if ( strpos( $_s[0], '<' ) !== false ) {
				preg_match_all( self::getRegex(), $_s[0], $_m, PREG_SET_ORDER );
			} else {
				$_m = array( array( 'text' => $_s[0] ) );
			}

			$el = &$_s[1];
			$c	= -1;

			/** -----------------------------
			 * For each element ( text, tags, comments )
			 */

			for ( $i = 0, $ilen = count( $_m ); $i < $ilen; $i++ ) {
				$m = $_m[$i];

				/** ---------------------------
				 * Text
				 */

				$m['text'] = isset( $m['text'] ) ? $m['text'] : null;

				if ( $m['text'] !== null && $m['text'] !== '' ) {

					preg_match( '%^(?<a>[[:space:]]*).*?(?<b>[[:space:]]*)$%su', $m['text'], $s );

					$spacePattern = '%([[:space:]]*(\R)[[:space:]]*){2,}%u';

					if ( $i !== 0 && $i !== $ilen - 1 && ! preg_match( $spacePattern, $s['a'] ) ) {
						$el['children'][++$c] = array(
							'type'	=> 'space',
							'space' => $s['a'],
						);
					}

					$m['text'] = trim( $m['text'] );

					if ( $m['text'] !== '' ) {
						$_p = preg_split( $spacePattern, $m['text'] );

						foreach ( $_p as $p ) {
							$el['children'][++$c] = array(
								'type' => 'text',
								'text' => $p,
							);
						}

						if ( $i !== $ilen - 1 && ! preg_match( $spacePattern, $s['b'] ) ) {
							$el['children'][++$c] = array(
								'type'	=> 'space',
								'space' => $s['b'],
							);
						}
					}
					continue;
				} // Text

				/** ---------------------------
				 * Tag
				 */

				$m['raw']	 = isset( $m['raw'] )	 ? $m['raw']	 : null;
				$m['tag']	 = isset( $m['tag'] )	 ? $m['tag']	 : null;
				$m['attrs'] = isset( $m['attrs'] ) ? $m['attrs'] : null;
				$m['inner'] = isset( $m['inner'] ) ? $m['inner'] : null;

				if ( $m['tag'] !== null && $m['tag'] !== '' ) {

					$m['tag'] = strtolower( $m['tag'] );

					if ( $this->isFlowContainer( $m ) ) {

						$el['children'][++$c] = array(
							'type'		 => 'container',
							'tag'			=> $m['tag'],
							'attrs'		=> $m['attrs'],
							'children' => array(),
						);

						if ( $m['inner'] !== null && $m['inner'] !== '' ) {
							array_push( $stack, array( $m['inner'], &$el['children'][$c] ) );
						}

					} elseif ( $this->isGroupingContent( $m ) ) {

						$el['children'][++$c] = array(
							'type'		 => 'grouping',
							'tag'			=> $m['tag'],
							'attrs'		=> $m['attrs'],
							'children' => array(),
						);

						if ( $m['inner'] !== null && $m['inner'] !== '' ) {
							array_push( $stack, array( $m['inner'], &$el['children'][$c] ) );
						}

					} elseif ( $this->isPhrasingContent( $m ) ) {

						$el['children'][++$c] = array(
							'type' => 'phrasing',
							'raw'	=> $m['raw'],
						);

					} else {

						$el['children'][++$c] = array(
							'type' => 'flow',
							'raw'	=> $m['raw'],
						);
					}
					continue;
				} // Tag

				/** ---------------------------
				 * Comment
				 */

				$m['comment'] = isset( $m['comment'] ) ? $m['comment'] : null;

				if ( $m['comment'] !== null && $m['comment'] !== '' ) {

					$el['children'][++$c] = array(
						'type' => 'comment',
						'raw'	=> $m['raw'],
					);
				 	continue;
				} // Comment

			} // For each element
		} // While stack

		return $root;
	}

	/**
	 * treeToString
	 */
	protected function treeToString( $root, $br = true ) {
		$stack = array( array( &$root, 0 ) );
		$root['out'] = '';

		/** -----------------------------
		 * While stack
		 */

		while ( $_s = array_pop( $stack ) ) {
			$parent = &$_s[0];
			$c			=	$_s[1];
			$out		= &$parent['out'];

			/** -----------------------------
			 * For each element ( text, tags, comments )
			 */

			for ( $i = $c, $len = count( $parent['children'] ); $i < $len; $i++ ) {
				$el = &$parent['children'][$i];

				/** ---------------------------
				 * Text, Phrasing
				 */

				if ( $el['type'] === 'text' || $el['type'] === 'phrasing' ) {
					$type = $el['type'];

					if ( $type === 'text' ) {
						$content = str_replace( "\n", "<br>\n", $el['text'] );
					} else {
						$content = $el['raw'];
					}

					$j = $i;
					while ( isset( $parent['children'][$i + 2] ) ) {
						$a = $parent['children'][$i + 1];
						$b = $parent['children'][$i + 2];

						if ( $a['type'] === 'space' ) {
							if ( $b['type'] === 'text' ) {
								if ( $type !== 'text' ) {
									$type = $b['type'];
									$tmp = $a['space'] . $b['text'];
									$content .= str_replace( "\n", "<br>\n", $tmp );
									$i += 2;
								} else {
									break;
								}
							} elseif ( $b['type'] === 'phrasing' ) {
								$type = $b['type'];
								$tmp = $a['space'] . $b['raw'];
								$content .= str_replace( "\n", "<br>\n", $tmp );
								$i += 2;
							} elseif ( $b['type'] === 'comment' ) {
								$type = $b['type'];
								$content .= $a['space'] . $b['raw'];
								$i += 2;
							} else {
								break;
							}
						} else {
							break;
						}
					}

					$content = str_replace( "\n", "\n  ", "\n" . $content );

					if ( $parent['type'] === 'grouping' ) {
						$out .= "\n" . $content	. "\n";
					} else {
						if ( $len === 1 && ! $this->isForceParagraph( $parent ) ) {
							$out .= "\n" . $content	. "\n";
						} else {
							$out .= "\n<p>" . $content	. "\n</p>\n";
						}
					}
				} // Text, Phrasing

				/** ---------------------------
				 * Container, Grouping
				 */

				if ( $el['type'] === 'container' || $el['type'] === 'grouping' ) {
					if ( isset( $el['revisit'] ) && $el['revisit'] ) {
						$el['attrs'] = $el['attrs'] !== '' ? ' ' . $el['attrs'] : '';
						$out .= "\n" . '<' . $el['tag'] .	$el['attrs'] . '>';
						$out .= str_replace( "\n", "\n	", "\n". trim( $el['out'] ) );
						$out .= "\n" . '</' . $el['tag'] . '>' . "\n";
					} else {
						$stack[] = array( &$parent, $i );
						$stack[] = array( &$el, 0 );
						$el['revisit'] = true;
						$el['out'] = '';
						break;
					}
					continue;
				} // Container, Grouping

				/** ---------------------------
				 * Flow
				 */

				if ( $el['type'] === 'flow' ) {
					$out .= "\n" . $el['raw'] . "\n";
					continue;
				} // Flow

				/** ---------------------------
				 * Comment
				 */

				if ( $el['type'] === 'comment' ) {
					$out .= "\n" . $el['raw'] . "\n";
					continue;
				} // Comment

			} // For each element
		} // While stack

		return trim( $out );
	}

	/**
	 * getRegex
	 */
	protected static function getRegex() {
		return '%'
			. '(?<raw>'
			.	 '(?<text>'
			.		 '[^<]+'
			.	 ')'
			. '|'
			.	 '<!--(?<comment>.*?)-->'
			. '|'
			.	 '<(?<tag>'
			.		 '[^[:space:]>]+'
			.	 ')'
			.	 '(?:'
			.		 '[[:space:]]+'
			.		 '(?<attrs>'
			.			 '(?:'
			.				 '"(?:\\\"|[^"])*"'
			.			 '|'
			.				 "'(?:\\\'|[^'])*'"
			.			 '|'
			.				 '(?:[^/>"\']|/(?!>))'
			.			 ')*'
			.		 ')'
			.	 ')?'
			.	 '[[:space:]]*'
			.	 '(?:'
			.		 '/>'
			.	 '|'
			.		 '>'
			.		 '(?:'
			.			 '(?<inner>'
			.				 '(?R)*?'
			.			 ')'
			.			 '</\k<tag>>'
			.		 ')?'
			.	 ')'
			. ')'
			. '%isuS';
	}
}

/**
 * Replaces double line-breaks with paragraph elements.
 */
function wpautop_replacement( $output, $text, $br = true ) {
	static $autop;

	if ( ! $autop ) {
		$autop = new WP_Autop();
	}

	return $autop->autop( $text, $br );
}

add_filter( '_temp_wpautop', 'wpautop_replacement', 10, 2 );

