Advertisement
JoshDreamland

BB Parser with Content Type

Jan 23rd, 2014
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 11.07 KB | None | 0 0
  1. <?php
  2. /*!
  3.  * @file bbcode.php
  4.  *
  5.  * A BBCode tag is defined in terms of two parameters, a parse type,
  6.  * and an argument count. The parse type, "type", is given by one of
  7.  * the tag type ("TT_") constants.
  8.  *
  9.  * The argument count, "args", is in the range [0,4]. A zero-argument tag
  10.  * does not have an end tag. A one-argument tag has its argument between
  11.  * the start and end tag. A two-element tag is identical to a one-argument
  12.  * tag with the advent of an =arg1 in the opening tag, eg,
  13.  *   [url=arg1]arg0[/url]
  14.  * A three-argument tag is given an additional parameter at tag close.
  15.  * A four-element tag actually has unlimited arguments and operates as
  16.  * an HTML tag. The parameters are passed in an associative array to
  17.  * a callback, which is the only valid type for this count.
  18.  *
  19.  * Tags also contain an "enclosed" flag which denotes what is wrapped
  20.  * inside the tags; this is one of the content type ("CT_") constants.
  21.  *
  22.  * From an implementation perspective, the argument count controls how
  23.  * the tag is decomposed into memory, ie, parsed. The type determines
  24.  * how the tag is evaluated after parse finishes. The argument count is
  25.  * used in evaluation to pass the correct number of parameters, but the
  26.  * type is not considered during parse, at all.
  27.  *
  28.  * Other notes: Tag names may not contain spaces or "=".
  29. */
  30.  
  31. const TT_TRIVIAL = 0;    ///< This tag is a direct HTML wrapper tag
  32. const TT_INTERLEAVE = 1; ///< This tag becomes "html"[0] . arg0 . "html"[1] . arg1 ...
  33. const TT_CALLBACK = 2;   ///< This tag gets evaluated by a callback function
  34.  
  35. const CT_MARKDOWN = 0; ///< The inside of the tag is Markdown-format text as non-BB. This is the default.
  36. const CT_RAW = 1; ///< The inside of the tag is raw data and should not be formatted.
  37.  
  38. include('tags.php');
  39.  
  40. function notice($s) { echo "<!-- Notice: $s -->\n"; }
  41.  
  42. /*!
  43.  * This is a recursive function; call it with i = 0, and it will
  44.  * call itself recursively until all tags are parsed. It returns the
  45.  * parsed string, with $i set to the position of the first-read closing tag.
  46.  * @param  $str  The string to parse.
  47.  * @param  $i    [in/out] The position from which to start parsing.
  48.  *               Set at the end of the function to denote the first
  49.  *               unparsed character, or FALSE if all characters have
  50.  *               been consumed.
  51.  * @return Returns the HTML parsed substring of the given input.
  52. */
  53. function parse_sub($str, &$i, $contenttype)
  54. {
  55.   global $bbtags;
  56.   $outstr = "";
  57.   $bookmark = $i;
  58.   for ($i = strpos($str, '[', $i); $i !== FALSE; $i = strpos($str, '[', $i))
  59.   {
  60.     $close = strpos($str, ']', $i + 1);
  61.     if ($close == FALSE) {
  62.       $i = FALSE;
  63.       return $outstr . parse_nonbb(substr($str, $bookmark, $i - $bookmark), $contenttype);
  64.     }
  65.    
  66.     // Look inside our tag, now
  67.     $stag = substr($str, $i+1, $close - $i - 1);
  68.    
  69.     // If it's a closing tag, return and let the parent handle that
  70.     if ($stag[0] == '/')
  71.       return $outstr . parse_nonbb(substr($str, $bookmark, $i - $bookmark), $contenttype);
  72.    
  73.     // Doplegänger
  74.     if ($stag[0] == '[') {
  75.       ++$i;
  76.       continue;
  77.     }
  78.    
  79.     // Make sure we're safe if args=1; we don't know, yet
  80.     $tagc = preg_split('/\s*[\s=]\s*/', $stag, 2);
  81.     if (count($tagc) == 2)
  82.       $arg1 = $tagc[1]; // This technically allows [tag x]y[/tag]
  83.     else
  84.       $arg1 = NULL; // Don't reuse old arg values
  85.     $tname = strtolower($tagc[0]); // Deliberately not trimmed
  86.    
  87.     // Look up tag
  88.     $tstart = $i;
  89.     $i = $close + 1;
  90.     if (!array_key_exists($tname, $bbtags)) {
  91.       notice("No bbtag called [$tname] ($stag)");
  92.       continue;
  93.     }
  94.    
  95.     // Tag found
  96.     $bbtag = $bbtags[$tname];
  97.     if ($bbtag['args'] > 0)
  98.     {
  99.       $tlen = strlen($tname) + 2;
  100.      
  101.       // Handle associative tags
  102.       if ($bbtag['args'] > 3)
  103.       {
  104.         $i = $tstart + $tlen;
  105.         if (ctype_space($str[$i-1])) {
  106.           $args = read_attr_list($str, $i);
  107.           if ($args === NULL) { // Bail on failure
  108.             $i = FALSE;
  109.             return $outstr . parse_nonbb(substr($str, $bookmark), $contenttype);
  110.           }
  111.           ++$i;
  112.         }
  113.         else if ($str[$i-1] == ']')
  114.           $args = array();
  115.         else continue;
  116.       }
  117.       else {
  118.         $args = NULL;
  119.         if ($bbtag['args'] == 1 && $str[$tstart + $tlen - 1] != ']') {
  120.           notice("$str [$tstart+$tlen-1] != ']'");
  121.           continue;
  122.         }
  123.       }
  124.      
  125.       $arg0 = '';
  126.       for (;;) {
  127.         // This is where shit gets interesting
  128.         if (isset($bbtag['content'])) notice("Tag $tname has content type $bbtag[content]");
  129.         $arg0 .= parse_sub($str, $i, isset($bbtag['content'])? $bbtag['content'] : CT_MARKDOWN);
  130.        
  131.         // Make sure we arrived at our own closing tag
  132.         if ($i == FALSE)
  133.           return $outstr . parse_nonbb(substr($str, $bookmark), $contenttype);
  134.        
  135.         $close = strpos($str, ']', $i);
  136.         $ctag = substr($str, $i, $close - $i);
  137.         if (strncasecmp($ctag, '[/'.$tname, $tlen) != 0) {
  138.           notice("strncasecmp('$ctag', '[/'.'$tname', '$tlen') != 0");
  139.           $arg0 .= $str[$i++];
  140.           continue; // If not, just keep looking
  141.         }
  142.        
  143.         break;
  144.       }
  145.      
  146.       // Now we have a little more parsing to do for ternary tags
  147.       if ($bbtag['args'] == 3) {
  148.         $arg2 = trim(substr($str, $i + $tlen, $close - $i - $tlen));
  149.         if (strlen($arg2) > 0 && $arg2[0] == '=')
  150.           $arg2 = trim(substr($arg2, 1));
  151.       }
  152.       else
  153.         $arg2 = NULL;
  154.      
  155.       $i = $close + 1;
  156.     }
  157.     else
  158.       $arg0 = $arg1 = $arg2 = $args = NULL;
  159.    
  160.     $outstr .= parse_nonbb(substr($str, $bookmark, $tstart - $bookmark), $contenttype);
  161.     $outstr .= evaluate_tag($tname, $bbtag, $arg0, $arg1, $arg2, $args);
  162.     $bookmark = $i;
  163.   }
  164.   $outstr .= parse_nonbb(substr($str, $bookmark), $contenttype);
  165.   return $outstr;
  166. }
  167.  
  168.  
  169. /*! This function might as well be in that last block of code,
  170.  * but it was getting frighteningly big, so I moved it here.
  171.  * @note Unused tag arguments should be NULL; \p $arg1 and \p $arg2 should
  172.  *       never be non-null at the same time as \p $args.
  173.  * @param $tname  The name of the tag.
  174.  * @param $bbtag  The tag array from the \c bbtags array.
  175.  * @param $arg0   The first argument, the text between the tags, if applicable.
  176.  * @param $arg1   The second argument, the = value in the opening tag, if applicable.
  177.  * @param $arg2   The third argument, the = value in the closing tag, if applicable.
  178.  * @param $args   An associative array of arguments, if applicable.
  179.  * @return Returns the result of evaluating the tag;
  180.  *         a string with which to replace the tag.
  181. */
  182. function evaluate_tag($tname, $bbtag, $arg0, $arg1, $arg2, $args) {
  183.   switch ($bbtag['type'])
  184.   {
  185.     case TT_TRIVIAL: switch ($bbtag['args']) {
  186.       case 0:  return "<$tname />";
  187.       case 1:  return "<$tname>$arg0</$tname>";
  188.       case 2:  return "<!-- This tag is invalid -->";
  189.       case 3:  return "<!-- This tag is invalid -->";
  190.       default: return "<!-- This tag would be a security risk -->";
  191.     }
  192.     case TT_INTERLEAVE: $h = $bbtag['html']; switch ($bbtag['args']) {
  193.       case 0:  return $h[0];
  194.       case 1:  return $h[0] . $arg0 . $h[1];
  195.       case 2:  return $h[0] . $arg1 . $h[1] . $arg0 . $h[2];
  196.       case 3:  return $h[0] . $arg1 . $h[1] . $arg0 . $h[2] . $arg2 . $h[3];
  197.       default: return "<!-- This tag cannot be automated -->";
  198.     }
  199.     case TT_CALLBACK: $f = $bbtag['func']; switch ($bbtag['args']) {
  200.       case 0:  return $f();
  201.       case 1:  return $f($arg0);
  202.       case 2:  return $f($arg0, $arg1);
  203.       case 3:  return $f($arg0, $arg1, $arg2);
  204.       default: return $f($arg0, $args);
  205.     }
  206.   }
  207. }
  208.  
  209.  
  210. /*! This nonsense is to parse associative tags.
  211.  * @param $str The string from which to read attributes.
  212.  * @param $i   [in/out] The position from which to start reading.
  213.  *             Set at end of function call to denote the end of the list.
  214.  * @return  Returns the associative array read in from the string.
  215. */
  216. function read_attr_list($str, &$i) {
  217.   $len = strlen($str);
  218.   $attrs = array();
  219.   while ($i < $len && $str[$i] != ']')
  220.   {
  221.     if (ctype_space($str[$i]))
  222.       continue;
  223.    
  224.     // Read an attribute name
  225.     $attr_start = $i;
  226.     while (++$i < $len && $str[$i] != '='
  227.        && !ctype_space($str[$i-1]));
  228.     $attr_name = substr($str, $attr_start, $i-$attr_start);
  229.    
  230.     // Read past the attribute name
  231.     while ($i < $len && ctype_space($str[$i]))
  232.       ++$i;
  233.     if ($i >= $len) // Bail if out of bounds
  234.       { notice("OOB1"); return NULL; }
  235.    
  236.     if ($str[$i] == '=')
  237.     {
  238.       while (++$i < $len && ctype_space($str[$i]));
  239.       if ($str[$i] == '"' || $str[$i] == '\'')
  240.       {
  241.         $val_start = $i + 1;
  242.         $ochar = $str[$i];
  243.         while (++$i < $len && $str[$i] != $ochar)
  244.           if ($str[$i] == '\\') ++$i;
  245.         if ($i >= $len)
  246.           { notice("OOB2"); return NULL; }
  247.         $val = str_replace(
  248.           array("\\\\", "\\\'", "\\\"", "\\r", "\\n", "\\t"),
  249.           array("\\",     "\'",   "\"",   "\r",  "\n", "\t"),
  250.           substr($str, $val_start, $i - $val_start)
  251.         );
  252.       }
  253.       else {
  254.         $val_start = $i;
  255.         while (++$i < $len && $str[$i] != ']' && !ctype_space($str[$i]));
  256.         if ($i >= $len)
  257.           { notice("OOB3"); return NULL; }
  258.         $val = substr($str, $val_start, $i - $val_start);
  259.       }
  260.     }
  261.     else $val = NULL;
  262.     $attrs[$attr_name] = $val;
  263.   }
  264.   return $attrs;
  265. }
  266.  
  267. /*!
  268.  * Parse for non-BBCode elements, such as URLs or (God forbid) Markdown.
  269.  * @param $str  HTML-escaped plain text input.
  270.  * @return  Returns HTML-formatted parsed text.
  271. */
  272. function parse_nonbb($str, $contenttype)
  273. {
  274.   if ($contenttype != CT_MARKDOWN)
  275.     return $str;
  276.  
  277.   $urlexp = "/(?i)\b("
  278.     . "(?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)"
  279.     . "(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+"
  280.     . "(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’])"
  281.   .")/";
  282.   return preg_replace($urlexp, "<a href=\"$1\">$1</a>", $str);
  283.   return $str;
  284. }
  285.  
  286. /*! Main BBCode parser call.
  287.  * @param  $str  The BBCode string to parse.
  288.  * @return Returns the HTML parsed version of the input.
  289. */
  290. function parse_bbcode($str) {
  291.   $i = 0;
  292.   $res = "";
  293.   while ($i !== false) {
  294.     $res .= parse_sub($str, $i, CT_MARKDOWN);
  295.     if ($i !== false)
  296.       $res .= $str[$i++];
  297.   }
  298.   return $res;
  299. }
  300.  
  301. $test = "
  302. [b]Bold[/b]
  303. [s]Strike[/s]
  304. [center]Center[/center]
  305. [u2]Dotted Under[/u2]
  306. [url=google.com]Link[/url]
  307. [col=red]Red[/col]
  308. [size=5]Font size[/size]
  309. [font=Arial]Font face[/font]
  310. [tnail]http://img_url[/tnail]
  311. [user]Precedent[/user]
  312. [profile]Precedent[/profile]
  313. [ln]
  314.  
  315. [youtube]ynnngasdf[/youtube]
  316. [paypal]test[/paypal]
  317.  
  318. http://google.com/
  319. www.google.com
  320.  
  321. [hr]
  322. ";
  323.  
  324. echo "=================================\nTest:\n=================================\n" . $test . "\n";
  325. echo "=================================\nResult:\n=================================\n";
  326. echo parse_bbcode($test);
  327.  
  328. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement