View difference between Paste ID: KqcX24FC and aQEbdw4j
SHOW: | | - or go back to the newest paste.
1
<?php
2
/*!
3
 * @file bbcode.php
4
 * 
5
 * A BBCode tag is defined in terms of two parameters, a parse type,
6
 * and an argument count. The parse type, "type", is given by one of
7
 * the tag type ("TT_") constants.
8
 *
9
 * The argument count, "args", is in the range [0,4]. A zero-argument tag
10
 * does not have an end tag. A one-argument tag has its argument between
11
 * the start and end tag. A two-element tag is identical to a one-argument
12
 * tag with the advent of an =arg1 in the opening tag, eg,
13
 *   [url=arg1]arg0[/url]
14
 * A three-argument tag is given an additional parameter at tag close.
15
 * A four-element tag actually has unlimited arguments and operates as
16
 * an HTML tag. The parameters are passed in an associative array to
17
 * a callback, which is the only valid type for this count.
18
 * 
19
 * Tags also contain an "enclosed" flag which denotes what is wrapped
20
 * inside the tags; this is one of the content type ("CT_") constants.
21
 *
22
 * From an implementation perspective, the argument count controls how
23
 * the tag is decomposed into memory, ie, parsed. The type determines
24
 * how the tag is evaluated after parse finishes. The argument count is
25
 * used in evaluation to pass the correct number of parameters, but the
26
 * type is not considered during parse, at all.
27
 *
28
 * Other notes: Tag names may not contain spaces or "=".
29
 * 
30
 * @section License
31
 * 
32
 * Copyright (C) 2014 Josh Ventura <josh at dreamland im>
33
 * 
34
 * This file is a BBCode parser. Permission is hereby granted,
35
 * free of charge, to any person obtaining a copy of this software
36
 * and associated documentation files (the "Software"), to deal in
37
 * the Software without restriction, including without limitation
38
 * the rights to use, copy, modify, merge, publish, distribute,
39
 * sublicense, and/or sell copies of the Software, and to permit
40
 * persons to whom the Software is furnished to do so, subject to
41
 * the following conditions:
42
 * 
43
 * The above copyright notice and this permission notice shall be
44
 * included in all copies or substantial portions of the Software.
45
 * 
46
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
47
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
48
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
49
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
50
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
51
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
52
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
53
 * OR OTHER DEALINGS IN THE SOFTWARE.
54
 * 
55
 * Amen.
56
*/
57
58
const TT_TRIVIAL = 0;    ///< This tag is a direct HTML wrapper tag
59
const TT_INTERLEAVE = 1; ///< This tag becomes "html"[0] . arg0 . "html"[1] . arg1 ...
60
const TT_CALLBACK = 2;   ///< This tag gets evaluated by a callback function
61
62
const CT_MARKDOWN = 0; ///< The inside of the tag is Markdown-format text as non-BB. This is the default.
63
const CT_RAW = 1; ///< The inside of the tag is raw data and should not be formatted.
64
65
include('tags.php');
66
67
function notice($s) { echo "<!-- Notice: $s -->\n"; }
68
69
/*! 
70
 * This is a recursive function; call it with i = 0, and it will
71
 * call itself recursively until all tags are parsed. It returns the
72
 * parsed string, with $i set to the position of the first-read closing tag.
73
 * @param  $str  The string to parse.
74
 * @param  $i    [in/out] The position from which to start parsing.
75
 *               Set at the end of the function to denote the first
76
 *               unparsed character, or FALSE if all characters have
77
 *               been consumed.
78
 * @return Returns the HTML parsed substring of the given input.
79
*/
80
function parse_sub($str, &$i, $contenttype, $opentags)
81
{
82
  global $bbtags;
83
  $outstr = "";
84
  $bookmark = $i;
85
  for ($i = strpos($str, '[', $i); $i !== FALSE; $i = strpos($str, '[', $i))
86
  {
87
    $close = strpos($str, ']', $i + 1);
88
    if ($close == FALSE) {
89
      $i = FALSE;
90
      break;
91
    }
92
    
93
    // Look inside our tag, now
94
    $stag = substr($str, $i+1, $close - $i - 1);
95
    
96
    // If it's a closing tag, return and let the parent handle that
97
    if ($stag[0] == '/')
98
      return $outstr . parse_nonbb(substr($str, $bookmark, $i - $bookmark), $contenttype);
99
    
100
    if ($contenttype == CT_RAW) {
101
      ++$i;
102
      continue;
103
    }
104
    
105-
    // Doplegänger
105+
    // Dopplegänger
106-
    if ($stag[0] == '[') {
106+
    $doppl = strpos($stag, '[');
107
    if ($doppl !== FALSE) {
108
      $i += $doppl + 1;
109
      continue;
110
    }
111
    
112
    // Make sure we're safe if args=1; we don't know, yet
113
    $tagc = preg_split('/\s*[\s=]\s*/', $stag, 2);
114
    if (count($tagc) == 2)
115
      $arg1 = $tagc[1]; // This technically allows [tag x]y[/tag]
116
    else
117
      $arg1 = NULL; // Don't reuse old arg values
118
    $tname = strtolower($tagc[0]); // Deliberately not trimmed
119
    
120
    // Look up tag
121
    $tstart = $i;
122
    $i = $close + 1;
123
    if (!array_key_exists($tname, $bbtags)) {
124
      notice("No bbtag called [$tname] ($stag)");
125
      continue;
126
    }
127
    
128
    // Tag found
129
    $bbtag = $bbtags[$tname];
130
    if ($bbtag['args'] > 0)
131
    {
132
      $tlen = strlen($tname) + 2;
133
      
134
      // Handle associative tags
135
      if ($bbtag['args'] > 3)
136
      {
137
        $i = $tstart + $tlen;
138
        if (ctype_space($str[$i-1])) {
139
          $args = read_attr_list($str, $i);
140
          if ($args === NULL) { // Bail on failure
141
            $i = FALSE;
142
            return $outstr . parse_nonbb(substr($str, $bookmark), $contenttype);
143
          }
144
          ++$i;
145
        }
146
        else if ($str[$i-1] == ']')
147
          $args = array();
148
        else continue;
149
      }
150
      else {
151
        $args = NULL;
152
        if ($bbtag['args'] == 1 && $str[$tstart + $tlen - 1] != ']') {
153
          notice("$str [$tstart+$tlen-1] != ']'");
154
          continue;
155
        }
156
      }
157
      
158
      $arg0 = '';
159
      $stend = $i;
160
      for (;;)
161
      {
162
        // This is where shit gets interesting
163
        array_push($opentags, $tname);
164
        $arg0 .= parse_sub($str, $i, isset($bbtag['content'])? $bbtag['content'] : CT_MARKDOWN, $opentags);
165
        array_pop($opentags);
166
        
167
        // Make sure we arrived at a closing tag
168
        if ($i == FALSE)
169
          return $outstr . parse_nonbb(substr($str, $bookmark, $stend - $bookmark), $contenttype) . $arg0;
170
        
171
        $close = strpos($str, ']', $i);
172
        $ctag = substr($str, $i, $close - $i);
173
        
174
        // Make sure this is *our* closing tag
175
        if (strncasecmp($ctag, '[/'.$tname, $tlen) != 0)
176
        {
177
          // If someone else's closing tag, bail; tags should be closed in order
178
          foreach ($opentags as $k => $otname)
179
            if (strncasecmp($ctag, '[/'.$otname, 2 + strlen($otname)) == 0)
180
              return $outstr . parse_nonbb(substr($str, $bookmark, $stend - $bookmark), $contenttype) . $arg0;
181
          
182
          // If not anyone's tag, just keep looking
183
          $arg0 .= $str[$i++];
184
          continue;
185
        }
186
        
187
        break;
188
      }
189
      
190
      // Now we have a little more parsing to do for ternary tags
191
      if ($bbtag['args'] == 3) {
192
        $arg2 = trim(substr($str, $i + $tlen, $close - $i - $tlen));
193
        if (strlen($arg2) > 0 && $arg2[0] == '=')
194
          $arg2 = trim(substr($arg2, 1));
195
      }
196
      else
197
        $arg2 = NULL;
198
      
199
      $i = $close + 1;
200
    }
201
    else
202
      $arg0 = $arg1 = $arg2 = $args = NULL;
203
    
204
    $outstr .= parse_nonbb(substr($str, $bookmark, $tstart - $bookmark), $contenttype);
205
    $outstr .= evaluate_tag($tname, $bbtag, $arg0, $arg1, $arg2, $args);
206
    $bookmark = $i;
207
  }
208
  $outstr .= parse_nonbb(substr($str, $bookmark), $contenttype);
209
  return $outstr;
210
}
211
212
213
/*! This function might as well be in that last block of code,
214
 * but it was getting frighteningly big, so I moved it here.
215
 * @note Unused tag arguments should be NULL; \p $arg1 and \p $arg2 should
216
 *       never be non-null at the same time as \p $args.
217
 * @param $tname  The name of the tag.
218
 * @param $bbtag  The tag array from the \c bbtags array.
219
 * @param $arg0   The first argument, the text between the tags, if applicable.
220
 * @param $arg1   The second argument, the = value in the opening tag, if applicable.
221
 * @param $arg2   The third argument, the = value in the closing tag, if applicable.
222
 * @param $args   An associative array of arguments, if applicable.
223
 * @return Returns the result of evaluating the tag;
224
 *         a string with which to replace the tag.
225
*/
226
function evaluate_tag($tname, $bbtag, $arg0, $arg1, $arg2, $args) {
227
  switch ($bbtag['type'])
228
  {
229
    case TT_TRIVIAL: switch ($bbtag['args']) {
230
      case 0:  return "<$tname />";
231
      case 1:  return "<$tname>$arg0</$tname>";
232
      case 2:  return "<!-- This tag is invalid -->";
233
      case 3:  return "<!-- This tag is invalid -->";
234
      default: return "<!-- This tag would be a security risk -->";
235
    }
236
    case TT_INTERLEAVE: $h = $bbtag['html']; switch ($bbtag['args']) {
237
      case 0:  return $h[0];
238
      case 1:  return $h[0] . $arg0 . $h[1];
239
      case 2:  return $h[0] . $arg1 . $h[1] . $arg0 . $h[2];
240
      case 3:  return $h[0] . $arg1 . $h[1] . $arg0 . $h[2] . $arg2 . $h[3];
241
      default: return "<!-- This tag cannot be automated -->";
242
    }
243
    case TT_CALLBACK: $f = $bbtag['func']; switch ($bbtag['args']) {
244
      case 0:  return $f();
245
      case 1:  return $f($arg0);
246
      case 2:  return $f($arg0, $arg1);
247
      case 3:  return $f($arg0, $arg1, $arg2);
248
      default: return $f($arg0, $args);
249
    }
250
  }
251
}
252
253
254
/*! This nonsense is to parse associative tags.
255
 * @param $str The string from which to read attributes.
256
 * @param $i   [in/out] The position from which to start reading.
257
 *             Set at end of function call to denote the end of the list.
258
 * @return  Returns the associative array read in from the string.
259
*/
260
function read_attr_list($str, &$i) {
261
  $len = strlen($str);
262
  $attrs = array();
263
  while ($i < $len && $str[$i] != ']')
264
  {
265
    if (ctype_space($str[$i])) {
266
      ++$i;
267
      continue;
268
    }
269
    
270
    // Read an attribute name
271
    $attr_start = $i;
272
    while (++$i < $len && $str[$i] != '=' && !ctype_space($str[$i]));
273
    $attr_name = substr($str, $attr_start, $i-$attr_start);
274
    
275
    if ($i >= $len) // Bail if out of bounds
276
      { notice("OOB0: '$attr_name'"); return NULL; }
277
      
278
    // Read past the attribute name
279
    while ($i < $len && ctype_space($str[$i]))
280
      { notice("white: '" . $str[$i] . "'"); ++$i; }
281
    if ($i >= $len) // Bail if out of bounds
282
      { notice("OOB1: '$attr_name'"); return NULL; }
283
    
284
    if ($str[$i] == '=')
285
    {
286
      while (++$i < $len && ctype_space($str[$i]));
287
      if ($str[$i] == '"' || $str[$i] == '\'')
288
      {
289
        $val_start = $i + 1;
290
        $ochar = $str[$i];
291
        while (++$i < $len && $str[$i] != $ochar)
292
          if ($str[$i] == '\\') ++$i;
293
        if ($i >= $len)
294
          { notice("OOB2"); return NULL; }
295
        $val = str_replace(
296
          array("\\\\", "\\\'", "\\\"", "\\r", "\\n", "\\t"),
297
          array("\\",     "\'",   "\"",   "\r",  "\n", "\t"),
298
          substr($str, $val_start, $i - $val_start)
299
        );
300
        ++$i;
301
      }
302
      else {
303
        $val_start = $i;
304
        while (++$i < $len && $str[$i] != ']' && !ctype_space($str[$i]));
305
        if ($i >= $len)
306
          { notice("OOB3"); return NULL; }
307
        $val = substr($str, $val_start, $i - $val_start);
308
      }
309
    }
310
    else {
311
      $val = NULL;
312
      ++$i;
313
    }
314
    $attrs[$attr_name] = $val;
315
  }
316
  return $attrs;
317
}
318
319
/*!
320
 * Parse for non-BBCode elements, such as URLs or (God forbid) Markdown.
321
 * @param $str  HTML-escaped plain text input.
322
 * @return  Returns HTML-formatted parsed text.
323
*/
324
function parse_nonbb($str, $contenttype)
325
{
326
  if ($contenttype != CT_MARKDOWN)
327
    return $str;
328
  
329
  $urlexp = "/(?i)\b("
330
    . "(?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)"
331
    . "(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+"
332
    . "(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’])"
333
  .")/";
334
  return preg_replace($urlexp, "<a href=\"$1\">$1</a>", $str);
335
  return $str;
336
}
337
338
/*! Main BBCode parser call.
339
 * @param  $str  The BBCode string to parse.
340
 * @return Returns the HTML parsed version of the input.
341
*/
342
function parse_bbcode($str) {
343
  $i = 0;
344
  $res = "";
345
  while ($i !== false) {
346
    $res .= parse_sub($str, $i, CT_MARKDOWN, array());
347
    if ($i !== false)
348
      $res .= $str[$i++];
349
  }
350
  return $res;
351
}
352
353
$test = "
354
[b]Bold[/b]
355
[assoc one=\"two\" two=\"four\" three=eight four=sixteen]yes[/assoc]
356
[/code]
357
www.google.com
358
359
[b]this should be[i] bold[/b]
360
[b]this should not be, but this should be: [b] bold[/b]
361
362
[hr]
363
364
[ [b]bold[/b] ]
365
[ [b]bold[/b] ]
366
367
http://google.com/
368
www.google.com [
369
";
370
371
echo "=================================\nTest:\n=================================\n" . $test . "\n";
372
echo "=================================\nResult:\n=================================\n";
373
echo parse_bbcode($test);
374
375
?>