BB Parser — FSX Friendly!

<?php
/*!
 * @file bbcode.php
 *
 * A BBCode tag is defined in terms of two parameters, a parse type,
 * and an argument count. The parse type, "type", is given by one of
 * the TT_ constants.
 *
 * The argument count, "args", is in the range [0,4]. A zero-argument tag
 * does not have an end tag. A one-argument tag has its argument between
 * the start and end tag. A two-element tag is identical to a one-argument
 * tag with the advent of an =arg1 in the opening tag, eg,
 *   [url=arg1]arg0[/url]
 * A three-argument tag is given an additional parameter at tag close.
 * A four-element tag actually has unlimited arguments and operates as
 * an HTML tag. The parameters are passed in an associative array to
 * a callback, which is the only valid type for this count.
 *
 * From an implementation perspective, the argument count controls how
 * the tag is decomposed into memory, ie, parsed. The type determines
 * how the tag is evaluated after parse finishes. The argument count is
 * used in evaluation to pass the correct number of parameters, but the
 * type is not considered during parse, at all.
 *
 * Other notes: Tag names may not contain spaces or "=".
*/

const TT_TRIVIAL = 0;    ///< This tag is a direct HTML wrapper tag
const TT_INTERLEAVE = 1; ///< This tag becomes "html"[0] . arg0 . "html"[1] . arg1 ...
const TT_CALLBACK = 2;   ///< This tag gets evaluated by a callback function

$bbtags = array(
  'b'   => array('type' => TT_TRIVIAL, 'args' => 1),
  'i'   => array('type' => TT_TRIVIAL, 'args' => 1),
  'u'   => array('type' => TT_TRIVIAL, 'args' => 1),
  'hr'  => array('type' => TT_TRIVIAL, 'args' => 0),
  'img' => array(
    'type' => TT_INTERLEAVE,
    'args' => 1,
    'html' => array('<img src="', '" alt="User image" />')
  ),
  'url' => array(
    'type' => TT_CALLBACK,
    'args' => 2,
    'func' => function($txt, $url) {
       return '<a href="' . $url . '">' . $txt . '</a>';
    }

  ),
  'bubble' => array(
    'type' => TT_CALLBACK,
    'args' => 4,
    'func' => function($txt, $args) {
       $br = isset($args['radius'])? $args['radius'] : 6;
       $col = isset($args['color'])? $args['color']  : "000";
       return '<span class="bubble" style="'
         . "border-radius:".$br."px;"
         . "color:#".$col.";"
         . '">' . $txt . '</span>';
    }
  ),
  'grad' => array(
    'type' => TT_CALLBACK,
    'args' => 3,
    'func' => function($txt, $arg0, $arg1) {
       if (empty($arg0)) $arg0="pink";
       if (empty($arg1)) $arg1="green";
       return "<span style=\"linear-gradient(to right,$arg0,$arg1)\">"
              . $txt . '</span>';
    }
  )
);

function notice($s) { echo "<!-- Notice: $s -->\n"; }

/*!
 * This is a recursive function; call it with i = 0, and it will
 * call itself recursively until all tags are parsed. It returns the
 * parsed string, with $i set to the position of the first-read closing tag.
 * @param  $str  The string to parse.
 * @param  $i    [in/out] The position from which to start parsing.
 *               Set at the end of the function to denote the first
 *               unparsed character, or FALSE if all characters have
 *               been consumed.
 * @return Returns the HTML parsed substring of the given input.
*/
function parse_sub($str, &$i)
{
  global $bbtags;
  $outstr = "";
  $bookmark = $i;
  for ($i = strpos($str, '[', $i); $i !== FALSE; $i = strpos($str, '[', $i))
  {
    $close = strpos($str, ']', $i + 1);
    if ($close == FALSE) {
      $i = FALSE;
      return $outstr . substr($str, $bookmark, $i - $bookmark);
    }

    // Look inside our tag, now
    $stag = substr($str, $i+1, $close - $i - 1);

    // If it's a closing tag, return and let the parent handle that
    if ($stag[0] == '/')
      return $outstr . substr($str, $bookmark, $i - $bookmark);

    // Doplegänger
    if ($stag[0] == '[') {
      ++$i;
      continue;
    }

    // Make sure we're safe if args=1; we don't know, yet
    $tagc = preg_split('/\s*[\s=]\s*/', $stag, 2);
    if (count($tagc) == 2)
      $arg1 = $tagc[1]; // This technically allows [tag x]y[/tag]
    else
      $arg1 = NULL; // Don't reuse old arg values
    $tname = strtolower($tagc[0]); // Deliberately not trimmed

    // Look up tag
    $tstart = $i;
    $i = $close + 1;
    if (!array_key_exists($tname, $bbtags)) {
      notice("No bbtag called [$tname] ($stag)");
      continue;
    }

    // Tag found
    $bbtag = $bbtags[$tname];
    if ($bbtag['args'] > 0)
    {
      $tlen = strlen($tname) + 2;

      // Handle associative tags
      if ($bbtag['args'] > 3)
      {
        $i = $tstart + $tlen;
        if (ctype_space($str[$i-1])) {
          $args = read_attr_list($str, $i);
          if ($args === NULL) { // Bail on failure
            $i = FALSE;
            return $outstr . substr($str, $bookmark);
          }
          ++$i;
        }
        else if ($str[$i-1] == ']')
          $args = array();
        else continue;
      }
      else {
        $args = NULL;
        if ($bbtag['args'] == 1 && $str[$tstart + $tlen - 1] != ']') {
          notice("$str [$tstart+$tlen-1] != ']'");
          continue;
        }
      }

      $arg0 = '';
      for (;;) {
        // This is where shit gets interesting
        $arg0 .= parse_sub($str, $i);

        // Make sure we arrived at our own closing tag
        if ($i == FALSE)
          return $outstr . substr($str, $bookmark);

        $close = strpos($str, ']', $i);
        $ctag = substr($str, $i, $close - $i);
        if (strncasecmp($ctag, '[/'.$tname, $tlen) != 0) {
          notice("strncasecmp('$ctag', '[/'.'$tname', '$tlen') != 0");
          $arg0 .= $str[$i++];
          continue; // If not, just keep looking
        }

        break;
      }

      // Now we have a little more parsing to do for ternary tags
      if ($bbtag['args'] == 3) {
        $arg2 = trim(substr($str, $i + $tlen, $close - $i - $tlen));
        if (strlen($arg2) > 0 && $arg2[0] == '=')
          $arg2 = trim(substr($arg2, 1));
      }
      else
        $arg2 = NULL;

      $i = $close + 1;
    }
    else
      $arg0 = $arg1 = $arg2 = $args = NULL;

    $outstr .= substr($str, $bookmark, $tstart - $bookmark);
    $outstr .= evaluate_tag($tname, $bbtag, $arg0, $arg1, $arg2, $args);
    $bookmark = $i;
  }
  $outstr .= substr($str, $bookmark);
  return $outstr;
}


/*! This function might as well be in that last block of code,
 * but it was getting frighteningly big, so I moved it here.
 * @note Unused tag arguments should be NULL; \p $arg1 and \p $arg2 should
 *       never be non-null at the same time as \p $args.
 * @param $tname  The name of the tag.
 * @param $bbtag  The tag array from the \c bbtags array.
 * @param $arg0   The first argument, the text between the tags, if applicable.
 * @param $arg1   The second argument, the = value in the opening tag, if applicable.
 * @param $arg2   The third argument, the = value in the closing tag, if applicable.
 * @param $args   An associative array of arguments, if applicable.
 * @return Returns the result of evaluating the tag;
 *         a string with which to replace the tag.
*/
function evaluate_tag($tname, $bbtag, $arg0, $arg1, $arg2, $args) {
  switch ($bbtag['type'])
  {
    case TT_TRIVIAL: switch ($bbtag['args']) {
      case 0:  return "<$tname />";
      case 1:  return "<$tname>$arg0</$tname>";
      case 2:  return "<!-- This tag is invalid -->";
      case 3:  return "<!-- This tag is invalid -->";
      default: return "<!-- This tag would be a security risk -->";
    }
    case TT_INTERLEAVE: $h = $bbtag['html']; switch ($bbtag['args']) {
      case 0:  return $h[0];
      case 1:  return $h[0] . $arg0 . $h[1];
      case 2:  return $h[0] . $arg1 . $h[1] . $arg0 . $h[2];
      case 3:  return $h[0] . $arg1 . $h[1] . $arg0 . $h[2] . $arg2 . $h[3];
      default: return "<!-- This tag cannot be automated -->";
    }
    case TT_CALLBACK: $f = $bbtag['func']; switch ($bbtag['args']) {
      case 0:  return $f();
      case 1:  return $f($arg0);
      case 2:  return $f($arg0, $arg1);
      case 3:  return $f($arg0, $arg1, $arg2);
      default: return $f($arg0, $args);
    }
  }
}


/*! This nonsense is to parse associative tags.
 * @param $str The string from which to read attributes.
 * @param $i   [in/out] The position from which to start reading.
 *             Set at end of function call to denote the end of the list.
 * @return  Returns the associative array read in from the string.
*/
function read_attr_list($str, &$i) {
  $len = strlen($str);
  $attrs = array();
  while ($i < $len && $str[$i] != ']')
  {
    if (ctype_space($str[$i]))
      continue;

    // Read an attribute name
    $attr_start = $i;
    while (++$i < $len && $str[$i] != '='
       && !ctype_space($str[$i-1]));
    $attr_name = substr($str, $attr_start, $i-$attr_start);

    // Read past the attribute name
    while ($i < $len && ctype_space($str[$i]))
      ++$i;
    if ($i >= $len) // Bail if out of bounds
      { notice("OOB1"); return NULL; }

    if ($str[$i] == '=')
    {
      while (++$i < $len && ctype_space($str[$i]));
      if ($str[$i] == '"' || $str[$i] == '\'')
      {
        $val_start = $i + 1;
        $ochar = $str[$i];
        while (++$i < $len && $str[$i] != $ochar)
          if ($str[$i] == '\\') ++$i;
        if ($i >= $len)
          { notice("OOB2"); return NULL; }
        $val = str_replace(
          array("\\\\", "\\\'", "\\\"", "\\r", "\\n", "\\t"),
          array("\\",     "\'",   "\"",   "\r",  "\n", "\t"),
          substr($str, $val_start, $i - $val_start)
        );
      }
      else {
        $val_start = $i;
        while (++$i < $len && $str[$i] != ']' && !ctype_space($str[$i]));
        if ($i >= $len)
          { notice("OOB3"); return NULL; }
        $val = substr($str, $val_start, $i - $val_start);
      }
    }
    else $val = NULL;
    $attrs[$attr_name] = $val;
  }
  return $attrs;
}

/*! Main BBCode parser call.
 * @param  $str  The BBCode string to parse.
 * @return Returns the HTML parsed version of the input.
*/
function parse_bbcode($str) {
  $i = 0;
  $res = "";
  while ($i !== false) {
    $res .= parse_sub($str, $i);
    if ($i !== false)
      $res .= $str[$i++];
  }
  return $res;
}

echo "Result:<br/>\n";
echo parse_bbcode(
"This is some [b]cool shit[/b], I'm sure you'll agree.
Italics: [i]check[/i].
Bold italics: [b][i]check[/i] and [i]double check[/i][/b]
Now, let's try [url=asses]some urls[/url].
And now, let's try [url = asses in thongs]urls with gaps[/url].
For good measure, [grad=red]graded spans[/grad=blue].
To fuck shit up, [grad=red]graded spans[/grad].
To really fuck shit up, [grad]graded spans[/grad].
Okay, [bubble]complicated tag time[/bubble]!
Now, [bubble radius='5' color=255]more complicated tag time[/bubble]!
Finally, [bubble radius='5[b]ha[/b][/bubble]' color=255]extremely complicated tag time[/bubble]!

This is an unmatched closing italic tag: [/i]
Here's one in a bold tag: [b]wat[/[/i][/[[/b]
[b]This is an unmatched bold tag.

This concludes the BBCode portion of your exam.
");

?>