<?php
/*
Plugin Name: Tagcode Interpreter
Description: Can be used to replace buggy shortcodes
Version: 0.9
Author: Jacob Beauregard
*/

/**
 * global index
*/
$_tagcode_register = array();

/**
 * tagcode public interface
*/
function add_tagcode($name,$handler,$priority=10) {
	global $_tagcode_register;
	$_tagcode_register[$name] = true;
	return add_filter(_tagcode_ref_name($name),$handler,$priority,3);
}
function apply_tagcodes($name,$content,$attrs) {
	$ref = array($name,$content,$attrs);
	return apply_filters_ref_array(_tagcode_ref_name($name),$ref);
}
function eval_tagcodes($content) {
	return _tagcode_eval($content);
}
function has_tagcode($name,$handler=false) {
	return has_filter(_tagcode_ref_name($name),$handler);
}
function remove_all_tagcodes($name,$priority=false) {
	global $_tagcode_register;
	$value = remove_all_filters(_tagcode_ref_name($name),$priority);
	if (!has_tagcode($name)) {
		$_tagcode_register[$name] = false;
	}
	return $value;
}
function remove_tagcode($name,$handler,$priority=10) {
	global $_tagcode_register;
	$value = remove_filter(_tagcode_ref_name($name),$handler,$priority,3);
	if (!has_tagcode($name)) {
		$_tagcode_register[$name] = false;
	}
	return $value;
}

/**
 * alias to use in filter for given tag name
*/
function _tagcode_ref_name($name) {
	return "_tagcode_".strtolower($args[0]);
}

/**
 * types of tagcode expressions
*/
function _tagcode_types() {
	return array(
		'esc_lsqbr',
		'esc_rsqbr',
		'tag_inline',
		'tag_open',
		'tag_close',
		'attr',
		'literal',
		'entity_ref',
		'char_ref',
		'char_str',
		'name',
		'text'
	);
}

/*
 * tagcode expressions that are subtypes of other tagcode expressions
*/
function _tagcode_sub_root() {
	//top level types
	return array('esc_rsqbr','esc_lsqbr','tag_inline','tag_open','tag_close','text');
}
function _tagcode_sub_tag_inline() {
	return array('attr','name');
}
function _tagcode_sub_tag_open() {
	return array('attr','name');
}
function _tagcode_sub_tag_close() {
	return array('name');
}
function _tagcode_sub_text() {
	return null;
}
function _tagcode_sub_name() {
	return null;
}
function _tagcode_sub_attr() {
	return array('name','literal');
}
function _tagcode_sub_literal() {
	return null;
	//return array('char_ref','entity_ref','char_str');
}
function _tagcode_sub_char_str() {
	return null;
}
function _tagcode_sub_char_ref() {
	return null;
}
function _tagcode_sub_entity_ref() {
	return null;
}
function _tagcode_sub_esc_lsqbr() {
	return null;
}
function _tagcode_sub_esc_rsqbr() {
	return null;
}

/**
 * returns subtypes of expression type
*/
function _tagcode_sub($type='root') {
	if (in_array($type,_tagcode_types()) || $type == 'root') {
		$func = "_tagcode_sub_{$type}";
		$subtypes = call_user_func($func);
	}
	return $subtypes;
}

/**
 * regular expressions to match valid expressions
*/
function _tagcode_re_name($named=false) {
	$expr = '[A-Za-z][-A-Za-z0-9_:.]*';
	return $named?"(?P<name>{$expr})":$expr;
}
function _tagcode_re_registered_name($named=false) {
	global $_tagcode_register;
	$tags = join("|",array_map('preg_quote',array_keys($_tagcode_register)));
	if (strlen($tags) === 0) {
		//to guarantee failure in the instance there are no registered tagcodes
		$expr = '$^';
	} else {
		$name = _tagcode_re_name();
		$expr = "{$name}(?<={$tags})";
	}
	return $named?"(?P<name>{$expr})":$expr;
}
function _tagcode_re_char_str($named=false) {
	$expr = "[^%&]+";
	return $named?"(?P<char_str>{$expr})":$expr;
}
function _tagcode_re_char_str_sq($named=false) {
	//single quote version for shortcode_re_char_str
	$expr =  "[^%&']+";
	return $named?"(?P<char_str>{$expr})":$expr;
}
function _tagcode_re_char_str_dq($named=false) {
	//double quote version for shortcode_re_char_str
	$expr = '[^%&"]+';
	return $named?"(?P<char_str>{$expr})":$expr;
}
function _tagcode_re_char_ref($named=false) {
	$expr = "&#(?:[0-9]+|x[0-9a-fA-F]+);";
	return $named?"(?P<char_ref>{$expr})":$expr;
}
function _tagcode_re_entity_ref($named=false) {
	$name = _tagcode_re_name();
	$expr = "&{$name};";
	return $named?"(?P<entity_ref>{$expr})":$expr;
}
function _tagcode_re_literal($named=false) {
	$nq = _tagcode_re_literal_nq();
	$sq = _tagcode_re_literal_sq();
	$dq = _tagcode_re_literal_dq();
	$expr = "(?:{$nq}|{$sq}|{$dq})";
	return $named?"(?P<literal>{$expr})":$expr;
}
function _tagcode_re_literal_nq() {
	//literal without quotes
	$expr = "[-a-zA-Z0-9_:.]+";
	return $expr;
}
function _tagcode_re_literal_sq() {
	//literal with single quotes
	$char_str = _tagcode_re_char_str_sq();
	$char_ref = _tagcode_re_char_ref();
	$entity_ref = _tagcode_re_entity_ref();
	$expr = "'(?:{$char_str}|{$char_ref}|{$entity_ref})*'";
	return $expr;
}
function _tagcode_re_literal_dq() {
	//literal with double quotes
	$char_str = _tagcode_re_char_str_dq();
	$char_ref = _tagcode_re_char_ref();
	$entity_ref = _tagcode_re_entity_ref();
	$expr =  "\"(?:{$char_str}|{$char_ref}|{$entity_ref})*\"";
	return $expr;
}
function _tagcode_re_attr($named=false) {
	$name  = _tagcode_re_name();
	$literal = _tagcode_re_literal();
	$expr = "(?<!^){$name}(?:={$literal})?";
	return $named?"(?P<attr>{$expr})":$expr;
}
function _tagcode_re_esc_lsqbr($named=false) {
	$expr = "\\[\\[";
	return $named?"(?P<esc_lsqbr>{$expr})":$expr;
}
function _tagcode_re_esc_rsqbr($named=false) {
	//two right square brackets not immediately followed by an odd number of sequential right square brackets
	$expr = '\]\](?=(?:\]\])*(?!\]))';
	return $named?"(?P<esc_rsqbr>{$expr})":$expr;
}
function _tagcode_re_tag_inline($named=false) {
	$name = _tagcode_re_registered_name();
	$attr = _tagcode_re_attr();
	$expr = "{$name}(?:\\s+{$attr})*";
	return $named?"\\[(?P<tag_inline>{$expr})\\s*\\/\\]":"\\[{$expr}\\s*\\/\\]";
}
function _tagcode_re_tag_open($named=false) {
	$name = _tagcode_re_registered_name();
	$attr = _tagcode_re_attr();
	$expr = "{$name}(?:\\s+{$attr})*";
	return $named?"\\[(?P<tag_open>{$expr})\\s*\\]":"\\[{$expr}\\s*\\]";
}
function _tagcode_re_tag_close($named=false) {
	$name = _tagcode_re_registered_name();
	$expr = $name;
	return $named?"\\[\\/(?P<tag_close>{$expr})\\s*\\]":"\\[\\/{$expr}\\s*\\]";
}
function _tagcode_re_text($named=false) {
	$expr = '(?s:.[^\[\]]*)';
	return $named?"(?P<text>{$expr})":$expr;
}

/**
 * combines shortcode regex of types specified in parameters, returns delimited regex
*/
function _tagcode_re_combine() {
	$types = func_get_args();

	$regexps = array();
	foreach ($types as $type) {
		$re_func = "_tagcode_re_{$type}";
		array_push($regexps,call_user_func($re_func,1));
	}
	$statement = '/' . join('|',$regexps) . '/';
	return $statement;
}

/**
 * returns regular expression to match subtypes of an expression (ex. attr => {$name}={$literal})
*/
function _tagcode_re_subtypes($type) {
	$subtypes = _tagcode_sub($type);
	$re = null;
	if (count($subtypes)) {
		$re = call_user_func_array('_tagcode_re_combine',$subtypes);
	}
	return $re;
}

/**
 * evaluation of different tagcode expressions
*/
function _tagcode_eval_name($name) {
	return strtolower($name['expression']);
}
function _tagcode_eval_char_str($char_str) {
	return $char_str['expression'];
}
function _tagcode_eval_char_ref($char_ref) {
	//Not sure if this is the proper way to handle char_refs.
	//I'm doing this rather than running html_entity_decode on a full string
	//because I don't understand html_entity_decode's argument for a
	//character set. Which also means that this code is probably incorrect.
	$expr = $char_ref['expression'];
	if ($expr[0] == 'x') {
		$expr = hexdec($expr);
	}
	$expr = chr($expr);
	return $expr;
}
function _tagcode_eval_entity_ref($entity_ref) {
	$expr = strtolower($entity_ref['expression']);
	$lookup = array_flip(get_html_translation_table(HTML_ENTITIES));
	return $lookup[$expr];
}
function _tagcode_eval_literal($literal) {
	//$expr = "";
	//foreach ($literal['children'] as $child) {
	//	$eval_func = "_tagcode_eval_{$child['type']}";
	//	$expr .= call_user_func($eval_func,$child);
	//}
	$expr = $literal['expression'];
	$expr = html_entity_decode($expr);
	if ($expr[0] == "'" || $expr[0] == '"') {
		$expr = substr($expr,1,strlen($expr)-2);
	}
	return $expr;
}
function _tagcode_eval_attr($attr) {
	$children = $attr['children'];
	$key = _tagcode_eval_name($children[0]);
	if (isset($children[1])) {
		$value = _tagcode_eval_literal($children[1]);
	} else {
		$value = $key;
	}
	return array('key' => $key, 'value' => $value);
}
function _tagcode_eval_esc_lsqbr($esc_lsqbr,&$stack) {
	$node = array_pop($stack);
	$node['content'] .= "[";
	array_push($stack,$node);
	return $node;
}
function _tagcode_eval_esc_rsqbr($esc_rsqbr,&$stack) {
	$node = array_pop($stack);
	$node['content'] .= "]";
	array_push($stack,$node);
	return $node;
}
function _tagcode_eval_tag_inline($tag_inline,&$stack) {
	$children = $tag_inline['children'];
	$name = _tagcode_eval_name($children[0]);
	$attrs = array();
	foreach ($children as $child) {
		if ($child['type'] == 'attr') {
			$attr = _tagcode_eval_attr($child);
			$key = $attr['key'];
			$value = $attr['value'];
			$attrs[$key] = $value;
		}
	}
	$content = apply_tagcodes($name,'',$attrs);
	// adds evaluated content to next on the stack
	$node = array_pop($stack);
	$node['content'] .= $content;
	array_push($stack,$node);
	return $node;
}
function _tagcode_eval_tag_open($tag_open,&$stack,&$refs) {
	$children = $tag_open['children'];
	$name = _tagcode_eval_name($children[0]);
	$attrs = array();
	foreach ($children as $child) {
		if ($child['type'] == 'attr') {
			$attr = _tagcode_eval_attr($child);
			$key = $attr['key'];
			$value = $attr['value'];
			$attrs[$key] = $value;
		}
	}
	$data = array('name' => $name, 'attrs' => $attrs);
	$node = array('node' => $data, 'content' => '');
	//throws itself on the stack with attributes, etc.
	array_push($stack,$node);
	$refs[$name] += 1;
	return $node;
}
function _tagcode_eval_tag_close($tag_close,&$stack,&$refs) {
	$value = null;
	$name = _tagcode_eval_name($tag_close['children'][0]);
	if ($refs[$name] > 0) {
		do {
			$child = array_pop($stack);
			$child_node  = $child['node'];
			$child_name  = $child['node']['name'];
			$child_attrs = $child['node']['attrs'];
			$parent = array_pop($stack);
			$parent['content'] .= apply_tagcodes($name,$child['content'],$child_attrs);
			$refs[$name] -= 1;
			array_push($stack,$parent);
		} while ($child_name != $name);
	}
	return array('name' => $name);
}
function _tagcode_eval_text($text,&$stack) {
	$node = array_pop($stack);
	$node['content'] .= $text['expression'];
	array_push($stack,$node);
	return $text['expression'];
}
function _tagcode_eval($expr) {
	$stack = array();
	$refs  = array();
	//build parse tree
	$root = _tagcode_build_tree($expr);

	//create root content node, throw on stack
	$root_node = array('node' => 'root', 'content' => '');
	array_push($stack,$root_node);
	//evaluate immediate children
	$subs = _tagcode_sub();
	foreach ($root['children'] as $child) {
		if (in_array($child['type'],$subs)) {
			$func = "_tagcode_eval_{$child['type']}";
			switch ($child['type']) {
			case 'tag_inline':
				_tagcode_eval_tag_inline($child,$stack,$refs);
				break;
			case 'tag_open':
				_tagcode_eval_tag_open($child,$stack,$refs);
				break;
			case 'tag_close':
				_tagcode_eval_tag_close($child,$stack,$refs);
				break;
			case 'text':
				_tagcode_eval_text($child,$stack);
				break;
			case 'esc_lsqbr':
				_tagcode_eval_esc_lsqbr($child,$stack);
				break;
			case 'esc_rsqbr':
				_tagcode_eval_esc_rsqbr($child,$stack);
				break;
			default:
				call_user_func($func,$child);
			}
		}
	}
	//evaluate children remaining on stack
	$child = array_pop($stack);
	while ($child['node'] != 'root') {
		$parent = array_pop($stack);
		$child_node  = $child['node'];
		$child_name  = $child['node']['name'];
		$child_attrs = $child['node']['attrs'];
		$parent['content'] .= apply_tagcodes($child_name,'',$child_attrs);
		$parent['content'] .= $child['content'];
		$child = $parent;
	}
	return $child['content'];
}

/**
* builds an expression tree from a tagcode expression
*/
function _tagcode_build_tree($parent) {
	if (is_string($parent)) {
		$parent = array('type' => 'root', 'expression' => $parent);
	}
	$parent_type = $parent['type'];
	$parent_expression = $parent['expression'];
	$re_subtypes = _tagcode_re_subtypes($parent_type);
	$nodes = array();
	if ($re_subtypes) {
		$match_set = array();
		preg_match_all($re_subtypes,$parent_expression,$match_set,PREG_SET_ORDER);
		foreach ($match_set as $index => $match) {
			foreach (_tagcode_types() as $type) {
				if (strlen($match[$type]) > 0) {
					$nodes[$index] = array('type' => $type, 'expression' => $match[$type]);
				}
			}
		}
	}
	$parent['children'] = array_map('_tagcode_build_tree',$nodes);
	return $parent;
}

add_filter('the_content','eval_tagcodes',11);
