Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /*!
- * @file bbcode.php
- *
- * A BBCode tag is defined in terms of two parameters, a parse type,
- * and an argument count. The parse type, "type", is given by one of
- * the tag type ("TT_") constants.
- *
- * The argument count, "args", is in the range [0,4]. A zero-argument tag
- * does not have an end tag. A one-argument tag has its argument between
- * the start and end tag. A two-element tag is identical to a one-argument
- * tag with the advent of an =arg1 in the opening tag, eg,
- * [url=arg1]arg0[/url]
- * A three-argument tag is given an additional parameter at tag close.
- * A four-element tag actually has unlimited arguments and operates as
- * an HTML tag. The parameters are passed in an associative array to
- * a callback, which is the only valid type for this count.
- *
- * Tags also contain an "enclosed" flag which denotes what is wrapped
- * inside the tags; this is one of the content type ("CT_") constants.
- *
- * From an implementation perspective, the argument count controls how
- * the tag is decomposed into memory, ie, parsed. The type determines
- * how the tag is evaluated after parse finishes. The argument count is
- * used in evaluation to pass the correct number of parameters, but the
- * type is not considered during parse, at all.
- *
- * Other notes: Tag names may not contain spaces or "=".
- */
- const TT_TRIVIAL = 0; ///< This tag is a direct HTML wrapper tag
- const TT_INTERLEAVE = 1; ///< This tag becomes "html"[0] . arg0 . "html"[1] . arg1 ...
- const TT_CALLBACK = 2; ///< This tag gets evaluated by a callback function
- const CT_MARKDOWN = 0; ///< The inside of the tag is Markdown-format text as non-BB. This is the default.
- const CT_RAW = 1; ///< The inside of the tag is raw data and should not be formatted.
- include('tags.php');
- function notice($s) { echo "<!-- Notice: $s -->\n"; }
- /*!
- * This is a recursive function; call it with i = 0, and it will
- * call itself recursively until all tags are parsed. It returns the
- * parsed string, with $i set to the position of the first-read closing tag.
- * @param $str The string to parse.
- * @param $i [in/out] The position from which to start parsing.
- * Set at the end of the function to denote the first
- * unparsed character, or FALSE if all characters have
- * been consumed.
- * @return Returns the HTML parsed substring of the given input.
- */
- function parse_sub($str, &$i, $contenttype)
- {
- global $bbtags;
- $outstr = "";
- $bookmark = $i;
- for ($i = strpos($str, '[', $i); $i !== FALSE; $i = strpos($str, '[', $i))
- {
- $close = strpos($str, ']', $i + 1);
- if ($close == FALSE) {
- $i = FALSE;
- return $outstr . parse_nonbb(substr($str, $bookmark, $i - $bookmark), $contenttype);
- }
- // Look inside our tag, now
- $stag = substr($str, $i+1, $close - $i - 1);
- // If it's a closing tag, return and let the parent handle that
- if ($stag[0] == '/')
- return $outstr . parse_nonbb(substr($str, $bookmark, $i - $bookmark), $contenttype);
- // Doplegänger
- if ($stag[0] == '[') {
- ++$i;
- continue;
- }
- // Make sure we're safe if args=1; we don't know, yet
- $tagc = preg_split('/\s*[\s=]\s*/', $stag, 2);
- if (count($tagc) == 2)
- $arg1 = $tagc[1]; // This technically allows [tag x]y[/tag]
- else
- $arg1 = NULL; // Don't reuse old arg values
- $tname = strtolower($tagc[0]); // Deliberately not trimmed
- // Look up tag
- $tstart = $i;
- $i = $close + 1;
- if (!array_key_exists($tname, $bbtags)) {
- notice("No bbtag called [$tname] ($stag)");
- continue;
- }
- // Tag found
- $bbtag = $bbtags[$tname];
- if ($bbtag['args'] > 0)
- {
- $tlen = strlen($tname) + 2;
- // Handle associative tags
- if ($bbtag['args'] > 3)
- {
- $i = $tstart + $tlen;
- if (ctype_space($str[$i-1])) {
- $args = read_attr_list($str, $i);
- if ($args === NULL) { // Bail on failure
- $i = FALSE;
- return $outstr . parse_nonbb(substr($str, $bookmark), $contenttype);
- }
- ++$i;
- }
- else if ($str[$i-1] == ']')
- $args = array();
- else continue;
- }
- else {
- $args = NULL;
- if ($bbtag['args'] == 1 && $str[$tstart + $tlen - 1] != ']') {
- notice("$str [$tstart+$tlen-1] != ']'");
- continue;
- }
- }
- $arg0 = '';
- for (;;) {
- // This is where shit gets interesting
- if (isset($bbtag['content'])) notice("Tag $tname has content type $bbtag[content]");
- $arg0 .= parse_sub($str, $i, isset($bbtag['content'])? $bbtag['content'] : CT_MARKDOWN);
- // Make sure we arrived at our own closing tag
- if ($i == FALSE)
- return $outstr . parse_nonbb(substr($str, $bookmark), $contenttype);
- $close = strpos($str, ']', $i);
- $ctag = substr($str, $i, $close - $i);
- if (strncasecmp($ctag, '[/'.$tname, $tlen) != 0) {
- notice("strncasecmp('$ctag', '[/'.'$tname', '$tlen') != 0");
- $arg0 .= $str[$i++];
- continue; // If not, just keep looking
- }
- break;
- }
- // Now we have a little more parsing to do for ternary tags
- if ($bbtag['args'] == 3) {
- $arg2 = trim(substr($str, $i + $tlen, $close - $i - $tlen));
- if (strlen($arg2) > 0 && $arg2[0] == '=')
- $arg2 = trim(substr($arg2, 1));
- }
- else
- $arg2 = NULL;
- $i = $close + 1;
- }
- else
- $arg0 = $arg1 = $arg2 = $args = NULL;
- $outstr .= parse_nonbb(substr($str, $bookmark, $tstart - $bookmark), $contenttype);
- $outstr .= evaluate_tag($tname, $bbtag, $arg0, $arg1, $arg2, $args);
- $bookmark = $i;
- }
- $outstr .= parse_nonbb(substr($str, $bookmark), $contenttype);
- return $outstr;
- }
- /*! This function might as well be in that last block of code,
- * but it was getting frighteningly big, so I moved it here.
- * @note Unused tag arguments should be NULL; \p $arg1 and \p $arg2 should
- * never be non-null at the same time as \p $args.
- * @param $tname The name of the tag.
- * @param $bbtag The tag array from the \c bbtags array.
- * @param $arg0 The first argument, the text between the tags, if applicable.
- * @param $arg1 The second argument, the = value in the opening tag, if applicable.
- * @param $arg2 The third argument, the = value in the closing tag, if applicable.
- * @param $args An associative array of arguments, if applicable.
- * @return Returns the result of evaluating the tag;
- * a string with which to replace the tag.
- */
- function evaluate_tag($tname, $bbtag, $arg0, $arg1, $arg2, $args) {
- switch ($bbtag['type'])
- {
- case TT_TRIVIAL: switch ($bbtag['args']) {
- case 0: return "<$tname />";
- case 1: return "<$tname>$arg0</$tname>";
- case 2: return "<!-- This tag is invalid -->";
- case 3: return "<!-- This tag is invalid -->";
- default: return "<!-- This tag would be a security risk -->";
- }
- case TT_INTERLEAVE: $h = $bbtag['html']; switch ($bbtag['args']) {
- case 0: return $h[0];
- case 1: return $h[0] . $arg0 . $h[1];
- case 2: return $h[0] . $arg1 . $h[1] . $arg0 . $h[2];
- case 3: return $h[0] . $arg1 . $h[1] . $arg0 . $h[2] . $arg2 . $h[3];
- default: return "<!-- This tag cannot be automated -->";
- }
- case TT_CALLBACK: $f = $bbtag['func']; switch ($bbtag['args']) {
- case 0: return $f();
- case 1: return $f($arg0);
- case 2: return $f($arg0, $arg1);
- case 3: return $f($arg0, $arg1, $arg2);
- default: return $f($arg0, $args);
- }
- }
- }
- /*! This nonsense is to parse associative tags.
- * @param $str The string from which to read attributes.
- * @param $i [in/out] The position from which to start reading.
- * Set at end of function call to denote the end of the list.
- * @return Returns the associative array read in from the string.
- */
- function read_attr_list($str, &$i) {
- $len = strlen($str);
- $attrs = array();
- while ($i < $len && $str[$i] != ']')
- {
- if (ctype_space($str[$i]))
- continue;
- // Read an attribute name
- $attr_start = $i;
- while (++$i < $len && $str[$i] != '='
- && !ctype_space($str[$i-1]));
- $attr_name = substr($str, $attr_start, $i-$attr_start);
- // Read past the attribute name
- while ($i < $len && ctype_space($str[$i]))
- ++$i;
- if ($i >= $len) // Bail if out of bounds
- { notice("OOB1"); return NULL; }
- if ($str[$i] == '=')
- {
- while (++$i < $len && ctype_space($str[$i]));
- if ($str[$i] == '"' || $str[$i] == '\'')
- {
- $val_start = $i + 1;
- $ochar = $str[$i];
- while (++$i < $len && $str[$i] != $ochar)
- if ($str[$i] == '\\') ++$i;
- if ($i >= $len)
- { notice("OOB2"); return NULL; }
- $val = str_replace(
- array("\\\\", "\\\'", "\\\"", "\\r", "\\n", "\\t"),
- array("\\", "\'", "\"", "\r", "\n", "\t"),
- substr($str, $val_start, $i - $val_start)
- );
- }
- else {
- $val_start = $i;
- while (++$i < $len && $str[$i] != ']' && !ctype_space($str[$i]));
- if ($i >= $len)
- { notice("OOB3"); return NULL; }
- $val = substr($str, $val_start, $i - $val_start);
- }
- }
- else $val = NULL;
- $attrs[$attr_name] = $val;
- }
- return $attrs;
- }
- /*!
- * Parse for non-BBCode elements, such as URLs or (God forbid) Markdown.
- * @param $str HTML-escaped plain text input.
- * @return Returns HTML-formatted parsed text.
- */
- function parse_nonbb($str, $contenttype)
- {
- if ($contenttype != CT_MARKDOWN)
- return $str;
- $urlexp = "/(?i)\b("
- . "(?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)"
- . "(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+"
- . "(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“â€â€˜â€™])"
- .")/";
- return preg_replace($urlexp, "<a href=\"$1\">$1</a>", $str);
- return $str;
- }
- /*! Main BBCode parser call.
- * @param $str The BBCode string to parse.
- * @return Returns the HTML parsed version of the input.
- */
- function parse_bbcode($str) {
- $i = 0;
- $res = "";
- while ($i !== false) {
- $res .= parse_sub($str, $i, CT_MARKDOWN);
- if ($i !== false)
- $res .= $str[$i++];
- }
- return $res;
- }
- $test = "
- [b]Bold[/b]
- [s]Strike[/s]
- [center]Center[/center]
- [u2]Dotted Under[/u2]
- [url=google.com]Link[/url]
- [col=red]Red[/col]
- [size=5]Font size[/size]
- [font=Arial]Font face[/font]
- [tnail]http://img_url[/tnail]
- [user]Precedent[/user]
- [profile]Precedent[/profile]
- [ln]
- [youtube]ynnngasdf[/youtube]
- [paypal]test[/paypal]
- http://google.com/
- www.google.com
- [hr]
- ";
- echo "=================================\nTest:\n=================================\n" . $test . "\n";
- echo "=================================\nResult:\n=================================\n";
- echo parse_bbcode($test);
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement