<?php
/**
 * WP_Formatting_AutoP
 */

class WP_Formatting_AutoP
{
  /**
   * Most elements that are used in the body of documents
   * and applications are categorized as flow content.
   *
   * @see http://www.w3.org/TR/html5/dom.html#flow-content
   */
  protected static $flowContent = array(
    'a', 'abbr', 'address', 'area', 'article', 'aside', 'audio', 'b', 'bdi',
    'bdo', 'blockquote', 'br', 'button', 'canvas', 'cite', 'code', 'data',
    'datalist', 'del', 'dfn', 'div', 'dl', 'em', 'embed', 'fieldset', 'figure',
    'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hr', 'i',
    'iframe', 'img', 'input', 'ins', 'kbd', 'keygen', 'label', 'main', 'map',
    'mark', 'math', 'meter', 'nav', 'noscript', 'object', 'ol', 'output', 'p',
    'pre', 'progress', 'q', 'ruby', 's', 'samp', 'script', 'section', 'select',
    'small', 'span', 'strong', 'sub', 'sup', 'svg', 'table', 'template',
    'textarea', 'time', 'u', 'ul', 'var', 'video',
  );

  /**
   * Phrasing content is the text of the document,
   * as well as elements that mark up that text at the intra-paragraph level.
   *
   * @see http://www.w3.org/TR/html5/dom.html#phrasing-content
   */
  protected static $phrasingContent = array(
    'a', 'abbr', 'area', 'audio', 'b', 'bdi', 'bdo', 'br', 'button', 'canvas',
    'cite', 'code', 'data', 'datalist', 'del', 'dfn', 'em', 'embed', 'i',
    'iframe', 'img', 'input', 'ins', 'kbd', 'keygen', 'label', 'map', 'mark',
    'math', 'meter', 'noscript', 'object', 'output', 'progress', 'q', 'ruby',
    's', 'samp', 'script', 'select', 'small', 'span', 'strong', 'sub', 'sup',
    'svg', 'template', 'textarea', 'time', 'u', 'var', 'video', 'wbr',
  );

  /**
   * Grouping of elements where flow content is expected.
   *
   * @see http://www.w3.org/TR/html5/grouping-content.html
   * @see http://www.w3.org/TR/html5/tabular-data.html
   */
  protected static $groupingContent = array(
    // Grouping content
    'ol', 'ul', 'dl',
    // Tabular data
    'table', 'tbody', 'thead', 'tfoot', 'tr',
  );

  /**
   * Where flow content is expected.
   *
   * @see http://www.w3.org/TR/html5/grouping-content.html#the-p-element
   */
  protected static $flowContainer = array(
    // Flow content
    /*'a',*/ 'address', 'article', 'aside', 'audio', 'blockquote', 'canvas', 'del',
    'div', 'fieldset', 'figure', 'footer', 'form', 'header', 'iframe', 'ins',
    'main', 'map', 'nav', 'noscript', 'object', 'section', 'video',
    // Grouping content
    'li', 'dt', 'dd', 'figcaption',
    // Tabular data
    'td', 'th', 'caption'
  );

  protected static $forceParagraph = array(
    'blockquote',
  );

  protected function isFlowContent($el)
  {
    return isset($el['tag']) && in_array($el['tag'], self::$flowContent);
  }

  protected function isPhrasingContent($el)
  {
    return isset($el['tag']) && in_array($el['tag'], self::$phrasingContent);
  }

  protected function isGroupingContent($el)
  {
    return isset($el['tag']) && in_array($el['tag'], self::$groupingContent);
  }

  protected function isFlowContainer($el)
  {
    return isset($el['tag']) && in_array($el['tag'], self::$flowContainer);
  }

  protected function isForceParagraph($el)
  {
    return isset($el['tag']) && in_array($el['tag'], self::$forceParagraph);
  }

  /**
   * autop
   */
  public function autop($text, $br = true)
  {
    return $this->treeToString($this->parse($text), $br);
  }

  /**
   * parse
   */
  public function parse($text)
  {
    $root = array(
      'type' => 'root', 'children' => array(),
    );

    $stack = array(
      array(preg_replace('%\R%u', "\n", $text), &$root),
    );

    /** -----------------------------
     * While stack
     */

    while ($_s = array_pop($stack)) {

      if (strpos($_s[0], '<') !== false) {
        preg_match_all(self::getRegex(), $_s[0], $_m, PREG_SET_ORDER);
      } else {
        $_m = array(array('text' => $_s[0]));
      }

      $el = &$_s[1];
      $c  = -1;

      /** -----------------------------
       * For each element (text, tags, comments)
       */

      for ($i = 0, $ilen = count($_m); $i < $ilen; $i++) {
        $m = $_m[$i];

        /** ---------------------------
         * Text
         */

        $m['text'] = isset($m['text']) ? $m['text'] : null;

        if ($m['text'] !== null && $m['text'] !== '') {

          preg_match('%^(?<a>[[:space:]]*).*?(?<b>[[:space:]]*)$%su', $m['text'], $s);

          $spacePattern = '%([[:space:]]*(\R)[[:space:]]*){2,}%u';

          if ($i !== 0 && $i !== $ilen - 1 && !preg_match($spacePattern, $s['a'])) {
            $el['children'][++$c] = array(
              'type'  => 'space',
              'space' => $s['a'],
            );
          }

          $m['text'] = trim($m['text']);

          if ($m['text'] !== '') {
            $_p = preg_split($spacePattern, $m['text']);

            foreach ($_p as $p) {
              $el['children'][++$c] = array(
                'type' => 'text',
                'text' => $p,
              );
            }

            if ($i !== $ilen - 1 && !preg_match($spacePattern, $s['b'])) {
            	$el['children'][++$c] = array(
                'type'  => 'space',
                'space' => $s['b'],
              );
            }
          }
          continue;
        } // Text

        /** ---------------------------
         * Tag
         */

        $m['raw']   = isset($m['raw'])   ? $m['raw']   : null;
        $m['tag']   = isset($m['tag'])   ? $m['tag']   : null;
        $m['attrs'] = isset($m['attrs']) ? $m['attrs'] : null;
        $m['inner'] = isset($m['inner']) ? $m['inner'] : null;

        if ($m['tag'] !== null && $m['tag'] !== '') {

          $m['tag'] = strtolower($m['tag']);

          if ($this->isFlowContainer($m)) {

            $el['children'][++$c] = array(
              'type'     => 'container',
              'tag'      => $m['tag'],
              'attrs'    => $m['attrs'],
              'children' => array(),
            );

            if ($m['inner'] !== null && $m['inner'] !== '') {
              array_push($stack, array($m['inner'], &$el['children'][$c]));
            }

          } else if ($this->isGroupingContent($m)) {

            $el['children'][++$c] = array(
              'type'     => 'grouping',
              'tag'      => $m['tag'],
              'attrs'    => $m['attrs'],
              'children' => array(),
            );

            if ($m['inner'] !== null && $m['inner'] !== '') {
              array_push($stack, array($m['inner'], &$el['children'][$c]));
            }

          } else if ($this->isPhrasingContent($m)) {

            $el['children'][++$c] = array(
              'type' => 'phrasing',
              'raw'  => $m['raw'],
            );

          } else {

            $el['children'][++$c] = array(
              'type' => 'flow',
              'raw'  => $m['raw'],
            );
          }
          continue;
        } // Tag

        /** ---------------------------
         * Comment
         */

        $m['comment'] = isset($m['comment']) ? $m['comment'] : null;

        if ($m['comment'] !== null && $m['comment'] !== '') {

          $el['children'][++$c] = array(
            'type' => 'comment',
            'raw'  => $m['raw'],
          );
         continue;
        } // Comment

      } // For each element
    } // While stack

    return $root;
  }

  /**
   * treeToString
   */
  protected function treeToString($root, $br = true)
  {
    $stack = array(array(&$root, 0));
    $root['out'] = '';

    /** -----------------------------
     * While stack
     */

    while ($_s = array_pop($stack)) {
      $parent = &$_s[0];
      $c      =  $_s[1];
      $out    = &$parent['out'];

      /** -----------------------------
       * For each element (text, tags, comments)
       */

      for ($i = $c, $len = count($parent['children']); $i < $len; $i++) {
        $el = &$parent['children'][$i];

        /** ---------------------------
         * Text, Phrasing
         */

        if ($el['type'] === 'text' || $el['type'] === 'phrasing') {
          $type = $el['type'];
          if ($type === 'text') {
            $content = str_replace("\n", "<br>\n", $el['text']);
          } else {
            $content = $el['raw'];
          }
          $j = $i;
          while (isset($parent['children'][$i + 2])) {
            $a = $parent['children'][$i + 1];
            $b = $parent['children'][$i + 2];

            if ($a['type'] === 'space') {
              if ($b['type'] === 'text') {
                if ($type !== 'text') {
                  $type = $b['type'];
                  $tmp = $a['space'] . $b['text'];
                  $content .= str_replace("\n", "<br>\n", $tmp);
                  $i += 2;
                } else break;
              } else if ($b['type'] === 'phrasing') {
                $type = $b['type'];
                $tmp = $a['space'] . $b['raw'];
                $content .= str_replace("\n", "<br>\n", $tmp);
                $i += 2;
              } else if ($b['type'] === 'comment') {
                $type = $b['type'];
                $content .= $a['space'] . $b['raw'];
                $i += 2;
              } else break;
            } else break;
          }

          $content = str_replace("\n", "\n  ", "\n" . $content);

          if ($parent['type'] === 'grouping') {
            $out .= "\n" . $content  . "\n";
          } else {
            if ($len === 1 && !$this->isForceParagraph($parent)) {
              $out .= "\n" . $content  . "\n";
            } else {
              $out .= "\n<p>" . $content  . "\n</p>\n";
            }
          }
        } // Text, Phrasing

        /** ---------------------------
         * Container, Grouping
         */

        if ($el['type'] === 'container' || $el['type'] === 'grouping') {
          if (isset($el['revisit']) && $el['revisit']) {
            $el['attrs'] = $el['attrs'] !== '' ? ' ' . $el['attrs'] : '';
            $out .= "\n" . '<' . $el['tag'] .  $el['attrs'] . '>';
            $out .= str_replace("\n", "\n  ", "\n". trim($el['out']));
            $out .= "\n" . '</' . $el['tag'] . '>' . "\n";
          } else {
            $stack[] = array(&$parent, $i);
            $stack[] = array(&$el, 0);
            $el['revisit'] = true;
            $el['out'] = '';
            break;
          }
          continue;
        } // Container, Grouping

        /** ---------------------------
         * Flow
         */

        if ($el['type'] === 'flow') {
          $out .= "\n" . $el['raw'] . "\n";
          continue;
        } // Flow

        /** ---------------------------
         * Comment
         */

        if ($el['type'] === 'comment') {
          $out .= "\n" . $el['raw'] . "\n";
          continue;
        } // Comment

      } // For each element
    } // While stack

    return trim($out);
  }

  /**
   * getRegex
   */
  protected static function getRegex()
  {
    return '%'
      . '(?<raw>'
      .   '(?<text>'
      .     '[^<]+'
      .   ')'
      . '|'
      .   '<!--(?<comment>.*?)-->'
      . '|'
      .   '<(?<tag>'
      .     '[^[:space:]>]+'
      .   ')'
      .   '(?:'
      .     '[[:space:]]+'
      .     '(?<attrs>'
      .       '(?:'
      .         '"(?:\\\"|[^"])*"'
      .       '|'
      .         "'(?:\\\'|[^'])*'"
      .       '|'
      .         '(?:[^/>"\']|/(?!>))'
      .       ')*'
      .     ')'
      .   ')?'
      .   '[[:space:]]*'
      .   '(?:'
      .     '/>'
      .   '|'
      .     '>'
      .     '(?:'
      .       '(?<inner>'
      .         '(?R)*?'
      .       ')'
      .       '</\k<tag>>'
      .     ')?'
      .   ')'
      . ')'
      . '%isuS';
  }
}

/**
 * Replaces double line-breaks with paragraph elements.
 */
function wpautop_new($text) {
  static $autop;
  if (!$autop) {
    $autop = new WP_Formatting_AutoP();
  }
  return $autop->autop($text);
}
