Advertisement
JoshDreamland

BB Parser

Jan 16th, 2014
44
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 9.78 KB | None | 0 0
  1. <?php
  2. // A BBCode tag is defined in terms of two parameters, a parse type,
  3. // and an argument count. The parse type, "type", is given by one of
  4. // the following constants:
  5.  
  6. const TT_TRIVIAL = 0;    // This tag is a direct HTML wrapper tag
  7. const TT_INTERLEAVE = 1; // This tag becomes "html"[0] . arg0 . "html"[1] . arg1 ...
  8. const TT_CALLBACK = 2;   // This tag gets evaluated by a callback function
  9.  
  10. // The argument count, "args", is in the range [0,4]. A zero-argument tag
  11. // does not have an end tag. A one-argument tag has its argument between
  12. // the start and end tag. A two-element tag is identical to a one-argument
  13. // tag with the advent of an =arg1 in the opening tag, eg,
  14. //   [url=arg1]arg0[/url]
  15. // A three-argument tag is given an additional parameter at tag close.
  16. // A four-element tag actually has unlimited arguments and operates as
  17. // an HTML tag. The parameters are passed in an associative array to
  18. // a callback, which is the only valid type for this count.
  19.  
  20. // From an implementation perspective, the argument count controls how
  21. // the tag is decomposed into memory, ie, parsed. The type determines
  22. // how the tag is evaluated after parse finishes. The argument count is
  23. // used in evaluation to pass the correct number of parameters, but the
  24. // type is not considered during parse, at all.
  25.  
  26. // Other notes: Tag names may not contain spaces or "=".
  27.  
  28. $bbtags = array(
  29.   'b'   => array('type' => TT_TRIVIAL, 'args' => 1),
  30.   'i'   => array('type' => TT_TRIVIAL, 'args' => 1),
  31.   'u'   => array('type' => TT_TRIVIAL, 'args' => 1),
  32.   'hr'  => array('type' => TT_TRIVIAL, 'args' => 0),
  33.   'img' => array(
  34.     'type' => TT_INTERLEAVE,
  35.     'args' => 1,
  36.     'html' => array('<img src="', '" alt="User image" />')
  37.   ),
  38.   'url' => array(
  39.     'type' => TT_CALLBACK,
  40.     'args' => 2,
  41.     'func' => function($txt, $url) {
  42.        return '<a href="' . $url . '">' . $txt . '</a>';
  43.     }
  44.    
  45.   ),
  46.   'bubble' => array(
  47.     'type' => TT_CALLBACK,
  48.     'args' => 4,
  49.     'func' => function($txt, $args) {
  50.        $br = isset($args['radius'])? $args['radius'] : 6;
  51.        $col = isset($args['color'])? $args['color']  : "000";
  52.        return '<span class="bubble" style="'
  53.          . "border-radius:".$br."px;"
  54.          . "color:#".$col.";"
  55.          . '">' . $txt . '</span>';
  56.     }
  57.   ),
  58.   'grad' => array(
  59.     'type' => TT_CALLBACK,
  60.     'args' => 3,
  61.     'func' => function($txt, $arg0, $arg1) {
  62.        if (empty($arg0)) $arg0="pink";
  63.        if (empty($arg1)) $arg1="green";
  64.        return "<span style=\"linear-gradient(to right,$arg0,$arg1)\">"
  65.               . $txt . '</span>';
  66.     }
  67.   )
  68. );
  69.  
  70. function notice($s) { echo "<!-- Notice: $s -->\n"; }
  71.  
  72. // This is a recursive function; call it with i = 0, and it will
  73. // call itself recursively until all tags are parsed. It returns the
  74. // parsed string, with $i set to the position of the first-read closing tag.
  75. function parse_sub($str, &$i)
  76. {
  77.   global $bbtags;
  78.   $outstr = "";
  79.   $bookmark = $i;
  80.   for ($i = strpos($str, '[', $i); $i !== FALSE; $i = strpos($str, '[', $i))
  81.   {
  82.     $close = strpos($str, ']', $i + 1);
  83.     if ($close == FALSE) {
  84.       $i = FALSE;
  85.       return $outstr . substr($str, $bookmark, $i - $bookmark);
  86.     }
  87.    
  88.     // Look inside our tag, now
  89.     $stag = substr($str, $i+1, $close - $i - 1);
  90.    
  91.     // If it's a closing tag, return and let the parent handle that
  92.     if ($stag[0] == '/')
  93.       return $outstr . substr($str, $bookmark, $i - $bookmark);
  94.    
  95.     // Doplegänger
  96.     if ($stag[0] == '[') {
  97.       ++$i;
  98.       continue;
  99.     }
  100.    
  101.     // Make sure we're safe if args=1; we don't know, yet
  102.     $tagc = preg_split('/\s*[\s=]\s*/', $stag, 2);
  103.     if (count($tagc) == 2)
  104.       $arg1 = $tagc[1]; // This technically allows [tag x]y[/tag]
  105.     else
  106.       $arg1 = NULL; // Don't reuse old arg values
  107.     $tname = strtolower($tagc[0]); // Deliberately not trimmed
  108.    
  109.     // Look up tag
  110.     $tstart = $i;
  111.     $i = $close + 1;
  112.     if (!array_key_exists($tname, $bbtags)) {
  113.       notice("No bbtag called [$tname] ($stag)");
  114.       continue;
  115.     }
  116.    
  117.     // Tag found
  118.     $bbtag = $bbtags[$tname];
  119.     if ($bbtag['args'] > 0)
  120.     {
  121.       $tlen = strlen($tname) + 2;
  122.      
  123.       // Handle associative tags
  124.       if ($bbtag['args'] > 3)
  125.       {
  126.         $i = $tstart + $tlen;
  127.         if (ctype_space($str[$i-1])) {
  128.           $args = read_attr_list($str, $i);
  129.           if ($args === NULL) { // Bail on failure
  130.             $i = FALSE;
  131.             return $outstr . substr($str, $bookmark);
  132.           }
  133.           ++$i;
  134.         }
  135.         else if ($str[$i-1] == ']')
  136.           $args = array();
  137.         else continue;
  138.       }
  139.       else {
  140.         $args = NULL;
  141.         if ($bbtag['args'] == 1 && $str[$tstart + $tlen - 1] != ']') {
  142.           notice("$str [$tstart+$tlen-1] != ']'");
  143.           continue;
  144.         }
  145.       }
  146.      
  147.       $arg0 = '';
  148.       for (;;) {
  149.         // This is where shit gets interesting
  150.         $arg0 .= parse_sub($str, $i);
  151.        
  152.         // Make sure we arrived at our own closing tag
  153.         if ($i == FALSE)
  154.           return $outstr . substr($str, $bookmark);
  155.        
  156.         $close = strpos($str, ']', $i);
  157.         $ctag = substr($str, $i, $close - $i);
  158.         if (strncasecmp($ctag, '[/'.$tname, $tlen) != 0) {
  159.           notice("strncasecmp('$ctag', '[/'.'$tname', '$tlen') != 0");
  160.           $arg0 .= $str[$i++];
  161.           continue; // If not, just keep looking
  162.         }
  163.        
  164.         break;
  165.       }
  166.      
  167.       // Now we have a little more parsing to do for ternary tags
  168.       if ($bbtag['args'] == 3) {
  169.         $arg2 = trim(substr($str, $i + $tlen, $close - $i - $tlen));
  170.         if (strlen($arg2) > 0 && $arg2[0] == '=')
  171.           $arg2 = trim(substr($arg2, 1));
  172.       }
  173.       else
  174.         $arg2 = NULL;
  175.      
  176.       $i = $close + 1;
  177.     }
  178.     else
  179.       $arg0 = $arg1 = $arg2 = $args = NULL;
  180.    
  181.     $outstr .= substr($str, $bookmark, $tstart - $bookmark);
  182.     $outstr .= evaluate_tag($tname, $bbtag, $arg0, $arg1, $arg2, $args);
  183.     $bookmark = $i;
  184.   }
  185.   $outstr .= substr($str, $bookmark);
  186.   return $outstr;
  187. }
  188.  
  189.  
  190. // This function might as well be in that last block of code,
  191. // but it was getting frighteningly big, so I moved it here.
  192. function evaluate_tag($tname, $bbtag, $arg0, $arg1, $arg2, $args) {
  193.   switch ($bbtag['type'])
  194.   {
  195.     case TT_TRIVIAL: switch ($bbtag['args']) {
  196.       case 0:  return "<$tname />";
  197.       case 1:  return "<$tname>$arg0</$tname>";
  198.       case 2:  return "<!-- This tag is invalid -->";
  199.       case 3:  return "<!-- This tag is invalid -->";
  200.       default: return "<!-- This tag would be a security risk -->";
  201.     }
  202.     case TT_INTERLEAVE: $h = $bbtag['html']; switch ($bbtag['args']) {
  203.       case 0:  return $h[0];
  204.       case 1:  return $h[0] . $arg0 . $h[1];
  205.       case 2:  return $h[0] . $arg1 . $h[1] . $arg0 . $h[2];
  206.       case 3:  return $h[0] . $arg1 . $h[1] . $arg0 . $h[2] . $arg2 . $h[3];
  207.       default: return "<!-- This tag cannot be automated -->";
  208.     }
  209.     case TT_CALLBACK: $f = $bbtag['func']; switch ($bbtag['args']) {
  210.       case 0:  return $f();
  211.       case 1:  return $f($arg0);
  212.       case 2:  return $f($arg0, $arg1);
  213.       case 3:  return $f($arg0, $arg1, $arg2);
  214.       default: return $f($arg0, $args);
  215.     }
  216.   }
  217. }
  218.  
  219.  
  220. // And then all this nonsense is to parse associative tags.
  221. function read_attr_list($str, &$i) {
  222.   $len = strlen($str);
  223.   $attrs = array();
  224.   while ($i < $len && $str[$i] != ']')
  225.   {
  226.     if (ctype_space($str[$i]))
  227.       continue;
  228.    
  229.     // Read an attribute name
  230.     $attr_start = $i;
  231.     while (++$i < $len && $str[$i] != '='
  232.        && !ctype_space($str[$i-1]));
  233.     $attr_name = substr($str, $attr_start, $i-$attr_start);
  234.    
  235.     // Read past the attribute name
  236.     while ($i < $len && ctype_space($str[$i]))
  237.       ++$i;
  238.     if ($i >= $len) // Bail if out of bounds
  239.       { notice("OOB1"); return NULL; }
  240.    
  241.     if ($str[$i] == '=')
  242.     {
  243.       while (++$i < $len && ctype_space($str[$i]));
  244.       if ($str[$i] == '"' || $str[$i] == '\'')
  245.       {
  246.         $val_start = $i + 1;
  247.         $ochar = $str[$i];
  248.         while (++$i < $len && $str[$i] != $ochar)
  249.           if ($str[$i] == '\\') ++$i;
  250.         if ($i >= $len)
  251.           { notice("OOB2"); return NULL; }
  252.         $val = str_replace(
  253.           array("\\\\", "\\\'", "\\\"", "\\r", "\\n", "\\t"),
  254.           array("\\",     "\'",   "\"",   "\r",  "\n", "\t"),
  255.           substr($str, $val_start, $i - $val_start)
  256.         );
  257.       }
  258.       else {
  259.         $val_start = $i;
  260.         while (++$i < $len && $str[$i] != ']' && !ctype_space($str[$i]));
  261.         if ($i >= $len)
  262.           { notice("OOB3"); return NULL; }
  263.         $val = substr($str, $val_start, $i - $val_start);
  264.       }
  265.     }
  266.     else $val = NULL;
  267.     $attrs[$attr_name] = $val;
  268.   }
  269.   return $attrs;
  270. }
  271.  
  272. function parse_bbcode($str) {
  273.   $i = 0;
  274.   $res = "";
  275.   while ($i !== false) {
  276.     $res .= parse_sub($str, $i);
  277.     if ($i !== false)
  278.       $res .= $str[$i++];
  279.   }
  280.   return $res;
  281. }
  282.  
  283. echo "Result:<br/>\n";
  284. echo parse_bbcode(
  285. "This is some [b]cool shit[/b], I'm sure you'll agree.
  286. Italics: [i]check[/i].
  287. Bold italics: [b][i]check[/i] and [i]double check[/i][/b]
  288. Now, let's try [url=asses]some urls[/url].
  289. And now, let's try [url = asses in thongs]urls with gaps[/url].
  290. For good measure, [grad=red]graded spans[/grad=blue].
  291. To fuck shit up, [grad=red]graded spans[/grad].
  292. To really fuck shit up, [grad]graded spans[/grad].
  293. Okay, [bubble]complicated tag time[/bubble]!
  294. Now, [bubble radius='5' color=255]more complicated tag time[/bubble]!
  295. Finally, [bubble radius='5[b]ha[/b][/bubble]' color=255]extremely complicated tag time[/bubble]!
  296.  
  297. This is an unmatched closing italic tag: [/i]
  298. Here's one in a bold tag: [b]wat[/[/i][/[[/b]
  299. [b]This is an unmatched bold tag.
  300.  
  301. This concludes the BBCode portion of your exam.
  302. ");
  303.  
  304. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement