<?php
/*
Plugin Name: Content Tag Interpreter
Description: Can be used to replace buggy shortcodes
Version: 0.97
Author: Jacob Beauregard
*/

/**
 * content_tag public interface
*/
function add_content_tag_handler($tag,$handler,$priority=10) {
	return add_filter(_content_tag_ref_name($tag),$handler,$priority,3);
}
function apply_content_tag_handlers($tag,$content,$attrs) {
	return apply_filters_ref_array(_content_tag_ref_name($tag),array($tag,$content,$attrs));
}
function eval_content_tags($content) {
	return _content_tag_eval($content);
}
function has_content_tag_handler($tag,$handler=false) {
	return has_filter(_content_tag_ref_name($tag),$handler);
}
function remove_all_content_tag_handlers($tag,$priority=false) {
	return remove_all_filters(_content_tag_ref_name($tag),$priority);
}
function remove_content_tag_handler($tag,$handler,$priority=10) {
	return remove_filter(_content_tag_ref_name($tag),$handler,$priority,3);
}

/**
 * alias to use in filter for given tag name
*/
function _content_tag_ref_name($tag) {
	return "_content_tag_".strtolower($tag);
}

/**
 * types of content_tag expressions
*/
function _content_tag_types() {
	return array(
		'esc_lsqbr',
		'esc_rsqbr',
		'tag_inline',
		'tag_open',
		'tag_close',
		'attr',
		'literal',
		'name',
		'text'
	);
}
function _content_tag_type_aliases($type) {
	$table = array(
		array('name','tag_name')
	);
	$aliases = array($type);
	foreach ($table as $entry) {
		if (in_array($type,$entry,true)) {
			$aliases = $entry;
		}
	}
	return $aliases;
}

/*
 * content_tag expressions that are subtypes of other content_tag expressions
*/
function _content_tag_sub_root() {
	//top level types
	return array('esc_lsqbr','esc_rsqbr','tag_inline','tag_open','tag_close','text');
}
function _content_tag_sub_tag_inline() {
	return array('tag_name','attr');
}
function _content_tag_sub_tag_open() {
	return array('tag_name','attr');
}
function _content_tag_sub_tag_close() {
	return array('tag_name');
}
function _content_tag_sub_text() {
	return array();
}
function _content_tag_sub_name() {
	return array();
}
function _content_tag_sub_attr() {
	return array('name','literal');
}
function _content_tag_sub_literal() {
	return array();
}
function _content_tag_sub_char_str() {
	return array();
}
function _content_tag_sub_char_ref() {
	return array();
}
function _content_tag_sub_entity_ref() {
	return array();
}
function _content_tag_sub_esc_lsqbr() {
	return array();
}
function _content_tag_sub_esc_rsqbr() {
	return array();
}

/**
 * returns subtypes of expression type
*/
function _content_tag_sub($type='root') {
	if (in_array($type,_content_tag_types(),true) || $type == 'root') {
		$func = "_content_tag_sub_{$type}";
		$subtypes = call_user_func($func);
	}
	return $subtypes;
}

/**
 * regular expressions to match valid expressions
*/
function _content_tag_re_lsqbr() {
	return '\[';
}
function _content_tag_re_rsqbr() {
	return '\]';
}
function _content_tag_re_fslash() {
	return '\/';
}
function _content_tag_re_eq() {
	return '=';
}
function _content_tag_re_ws() {
	return '\s';
}
function _content_tag_re_name($named=false) {
	$expr = '[A-Za-z][-A-Za-z0-9_:.]*';
	return $named?"(?P<name>{$expr})":$expr;
}
function _content_tag_re_tag_name($named=false) {
	$name = _content_tag_re_name($named);
	return "^{$name}";
}
function _content_tag_re_char_str($named=false) {
	$expr = "[^%&]+";
	return $named?"(?P<char_str>{$expr})":$expr;
}
function _content_tag_re_char_str_sq($named=false) {
	//single quote version for shortcode_re_char_str
	$expr =  "[^%&']+";
	return $named?"(?P<char_str>{$expr})":$expr;
}
function _content_tag_re_char_str_dq($named=false) {
	//double quote version for shortcode_re_char_str
	$expr = '[^%&"]+';
	return $named?"(?P<char_str>{$expr})":$expr;
}
function _content_tag_re_char_ref($named=false) {
	$expr = "&#(?:[0-9]+|x[0-9a-fA-F]+);";
	return $named?"(?P<char_ref>{$expr})":$expr;
}
function _content_tag_re_entity_ref($named=false) {
	$name = _content_tag_re_name();
	$expr = "&{$name};";
	return $named?"(?P<entity_ref>{$expr})":$expr;
}
function _content_tag_re_literal($named=false) {
	$nq = _content_tag_re_literal_nq();
	$sq = _content_tag_re_literal_sq();
	$dq = _content_tag_re_literal_dq();
	$expr = "(?:{$nq}|{$sq}|{$dq})";
	return $named?"(?P<literal>{$expr})":$expr;
}
function _content_tag_re_literal_nq() {
	//literal without quotes
	$expr = "[-a-zA-Z0-9_:.]+";
	return $expr;
}
function _content_tag_re_literal_sq() {
	//literal with single quotes
	$char_str = _content_tag_re_char_str_sq();
	$char_ref = _content_tag_re_char_ref();
	$entity_ref = _content_tag_re_entity_ref();
	$expr = "'(?:{$char_str}|{$char_ref}|{$entity_ref})*'";
	return $expr;
}
function _content_tag_re_literal_dq() {
	//literal with double quotes
	$char_str = _content_tag_re_char_str_dq();
	$char_ref = _content_tag_re_char_ref();
	$entity_ref = _content_tag_re_entity_ref();
	$expr =  "\"(?:{$char_str}|{$char_ref}|{$entity_ref})*\"";
	return $expr;
}
function _content_tag_re_attr($named=false) {
	$name  = _content_tag_re_name();
	$literal = _content_tag_re_literal();
	$eq = _content_tag_re_eq();
	$ws = _content_tag_re_ws();
	$expr = "{$name}(?:{$ws}*{$eq}{$ws}*{$literal})?";
	return $named?"(?P<attr>{$expr})":$expr;
}
function _content_tag_re_attr_list() {
	$ws = _content_tag_re_ws();
	$attr = _content_tag_re_attr();
	return "(?:{$ws}+{$attr})*";
}
function _content_tag_re_esc_lsqbr($named=false) {
	$lsqbr = _content_tag_re_lsqbr();
	$expr = "{$lsqbr}{$lsqbr}";
	return $named?"(?P<esc_lsqbr>{$expr})":$expr;
}
function _content_tag_re_esc_rsqbr($named=false) {
	$rsqbr = _content_tag_re_rsqbr();
	$expr = "{$rsqbr}{$rsqbr}";
	return $named?"(?P<esc_rsqbr>{$expr})":$expr;
}
function _content_tag_re_tag_inline($named=false) {
	$lsqbr = _content_tag_re_lsqbr();
	$rsqbr = _content_tag_re_rsqbr();
	$fslash = _content_tag_re_fslash();
	$ws = _content_tag_re_ws();
	$name = _content_tag_re_name();
	$attr = _content_tag_re_attr();
	$attr_list = _content_tag_re_attr_list();
	$expr = "{$lsqbr}{$ws}*{$name}{$attr_list}{$ws}*{$fslash}{$ws}*{$rsqbr}";
	return $named?"(?P<tag_inline>{$expr})":$expr;
}
function _content_tag_re_tag_open($named=false) {
	$lsqbr = _content_tag_re_lsqbr();
	$rsqbr = _content_tag_re_rsqbr();
	$ws = _content_tag_re_ws();
	$name = _content_tag_re_name();
	$attr = _content_tag_re_attr();
	$attr_list = _content_tag_re_attr_list();
	$expr = "{$lsqbr}{$ws}*{$name}{$attr_list}{$ws}*{$rsqbr}";
	return $named?"(?P<tag_open>{$expr})":$expr;
}
function _content_tag_re_tag_close($named=false) {
	$lsqbr = _content_tag_re_lsqbr();
	$rsqbr = _content_tag_re_rsqbr();
	$fslash = _content_tag_re_fslash();
	$ws = _content_tag_re_ws();
	$name = _content_tag_re_name();
	$expr = "{$lsqbr}{$ws}*{$fslash}{$ws}*{$name}{$ws}*{$rsqbr}";
	return $named?"(?P<tag_close>{$expr})":$expr;
}
function _content_tag_re_text($named=false) {
	$lsqbr = _content_tag_re_lsqbr();
	$rsqbr = _content_tag_re_rsqbr();
	//for speed, not the largest chunk
	$expr = "(?s:.[^{$lsqbr}{$rsqbr}]*)";
	return $named?"(?P<text>{$expr})":$expr;
}

/**
 * cleans tags to prepare for faster parsing (specifically for re_tag_name)
*/
function _content_tag_clean_tag_inline($expr) {
	$lsqbr = _content_tag_re_lsqbr();
	$rsqbr = _content_tag_re_rsqbr();
	$ws = _content_tag_re_ws();
	return preg_replace("/^{$lsqbr}{$ws}*/","",$expr);
}
function _content_tag_clean_tag_open($expr) {
	$lsqbr = _content_tag_re_lsqbr();
	$rsqbr = _content_tag_re_rsqbr();
	$ws = _content_tag_re_ws();
	return preg_replace("/^{$lsqbr}{$ws}*/","",$expr);
}
function _content_tag_clean_tag_close($expr) {
	$lsqbr = _content_tag_re_lsqbr();
	$rsqbr = _content_tag_re_rsqbr();
	$fslash = _content_tag_re_fslash();
	$ws = _content_tag_re_ws();
	return preg_replace("/^{$lsqbr}{$ws}*{$fslash}{$ws}*/","",$expr);
}
function _content_tag_clean_literal($expr) {
	if ($expr[0] == "'" || $expr[0] == '"') {
		$expr = substr($expr,1,strlen($expr)-2);
	}
	return $expr;
}

/**
 * returns a cleaned version of the expression for type
*/
function _content_tag_clean($type,$expr) {
	$func = "_content_tag_clean_{$type}";
	if (is_callable($func)) {
		$expr = call_user_func($func,$expr);
	}
	return $expr;
}

/**
 * combines shortcode regex of types specified in parameters, returns delimited regex
*/
function _content_tag_re_combine() {
	$types = func_get_args();
	$regexps = array();
	foreach ($types as $type) {
		$re_func = "_content_tag_re_{$type}";
		array_push($regexps,call_user_func($re_func,1));
	}
	$statement = '/' . join('|',$regexps) . '/';
	return $statement;
}

/**
 * returns regular expression to match subtypes of an expression (ex. attr => {$name}={$literal})
*/
function _content_tag_re_subtypes($type) {
	$subtypes = _content_tag_sub($type);
	$re = null;
	if (count($subtypes)) {
		$re = call_user_func_array('_content_tag_re_combine',$subtypes);
	}
	return $re;
}

/**
 * returns a node structure with specified type and expression
*/
function _content_tag_init_node($type,$expr) {
	return array(
		'type' => $type,
		'expression' => $expr,
		'name' => null,
		'attrs' => array(),
		'content' => '',
		'children' => array()
	);
} 

/**
* builds an expression tree from a content_tag expression
*/
function _content_tag_build_tree($expr) {
	$node = is_string($expr)?_content_tag_init_node('root',$expr):$expr;
	$parse_expr = _content_tag_clean($node['type'],$node['expression']);
	$subtypes = _content_tag_sub($node['type']);
	if (count($subtypes)) {
		$re_subtypes = _content_tag_re_subtypes($node['type']);
		preg_match_all($re_subtypes,$parse_expr,$matches,PREG_SET_ORDER);
		foreach ($matches as $match) {
			foreach ($match as $type => $expr) {
				$aliases = _content_tag_type_aliases($type);
				if ($expr && count(array_intersect($aliases,$subtypes))) {
					array_push($node['children'],_content_tag_init_node($type,$expr));
				}
			}
		}
	}
	$node['children'] = array_map('_content_tag_build_tree',$node['children']);
	return $node;
}

/**
 * adds a tag to the stack to have content added to
*/
function _content_tag_stack_push($tag,&$stack,&$refs) {
	array_push($stack,$tag);
	if (isset($refs[$tag['name']])) {
		$refs[$tag['name']] += 1;
	} else {
		$refs[$tag['name']] = 1;
	}
}

/**
 * evaluates all stack members above specified member as though they were inline
 * evaluates specified member with own content and content of members above it
 * evaluates nothing if specified member not on stack
*/
function _content_tag_stack_fold($name,&$stack,&$refs) {
	if ($name === null || (isset($refs[$name]) && $refs[$name] > 0)) {
		while (count($stack) > 1 && (!isset($tag) || $tag['name'] != $name)) {
			$tag = array_pop($stack);
			if ($tag['name'] === $name) {
				//include intermediate content for matched tag
				$expr = apply_content_tag_handlers($tag['name'],$tag['content'],$tag['attrs']);
			} else {
				//implicitly inline if the tag doesn't match.
				$expr = apply_content_tag_handlers($tag['name'],'',$tag['attrs']).$tag['content'];
			}
			_content_tag_eval_text(array('expression'=>$expr),$stack);
			$refs[$tag['name']] -= 1;
		}
	}
}

/**
 * evaluation of different content_tag expressions
*/
function _content_tag_eval_name($name) {
	return strtolower($name['expression']);
}
function _content_tag_eval_char_str($char_str) {
	return $char_str['expression'];
}
function _content_tag_eval_literal($literal) {
	//this could alternatively be broken down to 
	//entity reference (with subexpression name)
	//character reference
	//character
	$expr = $literal['expression'];
	$expr = _content_tag_clean_literal($expr);
	$expr = html_entity_decode($expr);
	return $expr;
}
function _content_tag_eval_attr($attr) {
	$children = $attr['children'];
	$key = _content_tag_eval_name($children[0]);
	if (isset($children[1])) {
		$value = _content_tag_eval_literal($children[1]);
	} else {
		$value = $key;
	}
	return array('key' => $key, 'value' => $value);
}
function _content_tag_eval_esc_lsqbr($esc_lsqbr,&$stack) {
	_content_tag_eval_text('[',$stack);
}
function _content_tag_eval_esc_rsqbr($esc_rsqbr,&$stack) {
	_content_tag_eval_text(']',$stack);
}
function _content_tag_eval_tag($tag) {
	$tag['name'] = _content_tag_eval_name($tag['children'][0]);
	foreach ($tag['children'] as $child) {
		if ($child['type'] == 'attr') {
			$attr = _content_tag_eval_attr($child);
			$tag['attrs'][$attr['key']] = $attr['value'];
		}
	}
	return $tag;
}
function _content_tag_eval_tag_inline($tag,&$stack) {
	$tag = _content_tag_eval_tag($tag);
	if (has_content_tag_handler($tag['name'])) {
		$expr = apply_content_tag_handlers($tag['name'],$tag['content'],$tag['attrs']);
	} else {
		$expr = $tag['expression'];
	}
	_content_tag_eval_text(array('expression' => $expr),$stack);
}
function _content_tag_eval_tag_open($tag,&$stack,&$refs) {
	$tag = _content_tag_eval_tag($tag);
	if (has_content_tag_handler($tag['name'])) {
		_content_tag_stack_push($tag,$stack,$refs);
	} else {
		_content_tag_eval_text($tag,$stack);
	}
}
function _content_tag_eval_tag_close($tag,&$stack,&$refs) {
	$tag['name'] = _content_tag_eval_name($tag['children'][0]);
	if (has_content_tag_handler($tag['name'])) {
		_content_tag_stack_fold($tag['name'],$stack,$refs);
	} else {
		_content_tag_eval_text($tag,$stack);
	}
}
function _content_tag_eval_text($text,&$stack) {
	$node = array_pop($stack);
	$node['content'] .= $text['expression'];
	array_push($stack,$node);
}
function _content_tag_eval($expr) {
	$stack = array();
	$refs  = array();
	$root = _content_tag_build_tree($expr);
	array_push($stack,$root);
	$subtypes = _content_tag_sub('root');
	foreach ($root['children'] as $child) {
		$func = "_content_tag_eval_{$child['type']}";
		if ($child['type'] == 'tag_inline') {
			_content_tag_eval_tag_inline($child,$stack,$refs);
		} elseif ($child['type'] == 'tag_open') {
			_content_tag_eval_tag_open($child,$stack,$refs);
		} elseif ($child['type'] == 'tag_close') {
			_content_tag_eval_tag_close($child,$stack,$refs);
		} elseif ($child['type'] == 'text') {
			_content_tag_eval_text($child,$stack);
		} elseif ($child['type'] == 'esc_lsqbr') {
			_content_tag_eval_esc_lsqbr($child,$stack);
		} elseif ($child['type'] == 'esc_rsqbr') {
			_content_tag_eval_esc_rsqbr($child,$stack);
		} elseif (in_array($child['type'],$subtypes,true)) {
			call_user_func($func,$child);
		}
	}
	_content_tag_stack_fold(null,$stack,$refs);
	return $stack[0]['content'];
}

add_filter('the_content','eval_content_tags',11);
