SHOW:
|
|
- or go back to the newest paste.
1 | <?php | |
2 | /*! | |
3 | * @file bbcode.php | |
4 | * | |
5 | * A BBCode tag is defined in terms of two parameters, a parse type, | |
6 | * and an argument count. The parse type, "type", is given by one of | |
7 | * the tag type ("TT_") constants. | |
8 | * | |
9 | * The argument count, "args", is in the range [0,4]. A zero-argument tag | |
10 | * does not have an end tag. A one-argument tag has its argument between | |
11 | * the start and end tag. A two-element tag is identical to a one-argument | |
12 | * tag with the advent of an =arg1 in the opening tag, eg, | |
13 | * [url=arg1]arg0[/url] | |
14 | * A three-argument tag is given an additional parameter at tag close. | |
15 | * A four-element tag actually has unlimited arguments and operates as | |
16 | * an HTML tag. The parameters are passed in an associative array to | |
17 | * a callback, which is the only valid type for this count. | |
18 | * | |
19 | * Tags also contain an "enclosed" flag which denotes what is wrapped | |
20 | * inside the tags; this is one of the content type ("CT_") constants. | |
21 | * | |
22 | * From an implementation perspective, the argument count controls how | |
23 | * the tag is decomposed into memory, ie, parsed. The type determines | |
24 | * how the tag is evaluated after parse finishes. The argument count is | |
25 | * used in evaluation to pass the correct number of parameters, but the | |
26 | * type is not considered during parse, at all. | |
27 | * | |
28 | * Other notes: Tag names may not contain spaces or "=". | |
29 | * | |
30 | * @section License | |
31 | * | |
32 | * Copyright (C) 2014 Josh Ventura <josh at dreamland im> | |
33 | * | |
34 | * This file is a BBCode parser. Permission is hereby granted, | |
35 | * free of charge, to any person obtaining a copy of this software | |
36 | * and associated documentation files (the "Software"), to deal in | |
37 | * the Software without restriction, including without limitation | |
38 | * the rights to use, copy, modify, merge, publish, distribute, | |
39 | * sublicense, and/or sell copies of the Software, and to permit | |
40 | * persons to whom the Software is furnished to do so, subject to | |
41 | * the following conditions: | |
42 | * | |
43 | * The above copyright notice and this permission notice shall be | |
44 | * included in all copies or substantial portions of the Software. | |
45 | * | |
46 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
47 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
48 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
49 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
50 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
51 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
52 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE | |
53 | * OR OTHER DEALINGS IN THE SOFTWARE. | |
54 | * | |
55 | * Amen. | |
56 | */ | |
57 | ||
58 | const TT_TRIVIAL = 0; ///< This tag is a direct HTML wrapper tag | |
59 | const TT_INTERLEAVE = 1; ///< This tag becomes "html"[0] . arg0 . "html"[1] . arg1 ... | |
60 | const TT_CALLBACK = 2; ///< This tag gets evaluated by a callback function | |
61 | ||
62 | const CT_MARKDOWN = 0; ///< The inside of the tag is Markdown-format text as non-BB. This is the default. | |
63 | const CT_RAW = 1; ///< The inside of the tag is raw data and should not be formatted. | |
64 | ||
65 | include('tags.php'); | |
66 | ||
67 | function notice($s) { echo "<!-- Notice: $s -->\n"; } | |
68 | ||
69 | /*! | |
70 | * This is a recursive function; call it with i = 0, and it will | |
71 | * call itself recursively until all tags are parsed. It returns the | |
72 | * parsed string, with $i set to the position of the first-read closing tag. | |
73 | * @param $str The string to parse. | |
74 | * @param $i [in/out] The position from which to start parsing. | |
75 | * Set at the end of the function to denote the first | |
76 | * unparsed character, or FALSE if all characters have | |
77 | * been consumed. | |
78 | * @return Returns the HTML parsed substring of the given input. | |
79 | */ | |
80 | function parse_sub($str, &$i, $contenttype, $opentags) | |
81 | { | |
82 | global $bbtags; | |
83 | $outstr = ""; | |
84 | $bookmark = $i; | |
85 | for ($i = strpos($str, '[', $i); $i !== FALSE; $i = strpos($str, '[', $i)) | |
86 | { | |
87 | $close = strpos($str, ']', $i + 1); | |
88 | if ($close == FALSE) { | |
89 | $i = FALSE; | |
90 | break; | |
91 | } | |
92 | ||
93 | // Look inside our tag, now | |
94 | $stag = substr($str, $i+1, $close - $i - 1); | |
95 | ||
96 | // If it's a closing tag, return and let the parent handle that | |
97 | if ($stag[0] == '/') | |
98 | return $outstr . parse_nonbb(substr($str, $bookmark, $i - $bookmark), $contenttype); | |
99 | ||
100 | if ($contenttype == CT_RAW) { | |
101 | ++$i; | |
102 | continue; | |
103 | } | |
104 | ||
105 | - | // Doplegänger |
105 | + | // Dopplegänger |
106 | - | if ($stag[0] == '[') { |
106 | + | $doppl = strpos($stag, '['); |
107 | if ($doppl !== FALSE) { | |
108 | $i += $doppl + 1; | |
109 | continue; | |
110 | } | |
111 | ||
112 | // Make sure we're safe if args=1; we don't know, yet | |
113 | $tagc = preg_split('/\s*[\s=]\s*/', $stag, 2); | |
114 | if (count($tagc) == 2) | |
115 | $arg1 = $tagc[1]; // This technically allows [tag x]y[/tag] | |
116 | else | |
117 | $arg1 = NULL; // Don't reuse old arg values | |
118 | $tname = strtolower($tagc[0]); // Deliberately not trimmed | |
119 | ||
120 | // Look up tag | |
121 | $tstart = $i; | |
122 | $i = $close + 1; | |
123 | if (!array_key_exists($tname, $bbtags)) { | |
124 | notice("No bbtag called [$tname] ($stag)"); | |
125 | continue; | |
126 | } | |
127 | ||
128 | // Tag found | |
129 | $bbtag = $bbtags[$tname]; | |
130 | if ($bbtag['args'] > 0) | |
131 | { | |
132 | $tlen = strlen($tname) + 2; | |
133 | ||
134 | // Handle associative tags | |
135 | if ($bbtag['args'] > 3) | |
136 | { | |
137 | $i = $tstart + $tlen; | |
138 | if (ctype_space($str[$i-1])) { | |
139 | $args = read_attr_list($str, $i); | |
140 | if ($args === NULL) { // Bail on failure | |
141 | $i = FALSE; | |
142 | return $outstr . parse_nonbb(substr($str, $bookmark), $contenttype); | |
143 | } | |
144 | ++$i; | |
145 | } | |
146 | else if ($str[$i-1] == ']') | |
147 | $args = array(); | |
148 | else continue; | |
149 | } | |
150 | else { | |
151 | $args = NULL; | |
152 | if ($bbtag['args'] == 1 && $str[$tstart + $tlen - 1] != ']') { | |
153 | notice("$str [$tstart+$tlen-1] != ']'"); | |
154 | continue; | |
155 | } | |
156 | } | |
157 | ||
158 | $arg0 = ''; | |
159 | $stend = $i; | |
160 | for (;;) | |
161 | { | |
162 | // This is where shit gets interesting | |
163 | array_push($opentags, $tname); | |
164 | $arg0 .= parse_sub($str, $i, isset($bbtag['content'])? $bbtag['content'] : CT_MARKDOWN, $opentags); | |
165 | array_pop($opentags); | |
166 | ||
167 | // Make sure we arrived at a closing tag | |
168 | if ($i == FALSE) | |
169 | return $outstr . parse_nonbb(substr($str, $bookmark, $stend - $bookmark), $contenttype) . $arg0; | |
170 | ||
171 | $close = strpos($str, ']', $i); | |
172 | $ctag = substr($str, $i, $close - $i); | |
173 | ||
174 | // Make sure this is *our* closing tag | |
175 | if (strncasecmp($ctag, '[/'.$tname, $tlen) != 0) | |
176 | { | |
177 | // If someone else's closing tag, bail; tags should be closed in order | |
178 | foreach ($opentags as $k => $otname) | |
179 | if (strncasecmp($ctag, '[/'.$otname, 2 + strlen($otname)) == 0) | |
180 | return $outstr . parse_nonbb(substr($str, $bookmark, $stend - $bookmark), $contenttype) . $arg0; | |
181 | ||
182 | // If not anyone's tag, just keep looking | |
183 | $arg0 .= $str[$i++]; | |
184 | continue; | |
185 | } | |
186 | ||
187 | break; | |
188 | } | |
189 | ||
190 | // Now we have a little more parsing to do for ternary tags | |
191 | if ($bbtag['args'] == 3) { | |
192 | $arg2 = trim(substr($str, $i + $tlen, $close - $i - $tlen)); | |
193 | if (strlen($arg2) > 0 && $arg2[0] == '=') | |
194 | $arg2 = trim(substr($arg2, 1)); | |
195 | } | |
196 | else | |
197 | $arg2 = NULL; | |
198 | ||
199 | $i = $close + 1; | |
200 | } | |
201 | else | |
202 | $arg0 = $arg1 = $arg2 = $args = NULL; | |
203 | ||
204 | $outstr .= parse_nonbb(substr($str, $bookmark, $tstart - $bookmark), $contenttype); | |
205 | $outstr .= evaluate_tag($tname, $bbtag, $arg0, $arg1, $arg2, $args); | |
206 | $bookmark = $i; | |
207 | } | |
208 | $outstr .= parse_nonbb(substr($str, $bookmark), $contenttype); | |
209 | return $outstr; | |
210 | } | |
211 | ||
212 | ||
213 | /*! This function might as well be in that last block of code, | |
214 | * but it was getting frighteningly big, so I moved it here. | |
215 | * @note Unused tag arguments should be NULL; \p $arg1 and \p $arg2 should | |
216 | * never be non-null at the same time as \p $args. | |
217 | * @param $tname The name of the tag. | |
218 | * @param $bbtag The tag array from the \c bbtags array. | |
219 | * @param $arg0 The first argument, the text between the tags, if applicable. | |
220 | * @param $arg1 The second argument, the = value in the opening tag, if applicable. | |
221 | * @param $arg2 The third argument, the = value in the closing tag, if applicable. | |
222 | * @param $args An associative array of arguments, if applicable. | |
223 | * @return Returns the result of evaluating the tag; | |
224 | * a string with which to replace the tag. | |
225 | */ | |
226 | function evaluate_tag($tname, $bbtag, $arg0, $arg1, $arg2, $args) { | |
227 | switch ($bbtag['type']) | |
228 | { | |
229 | case TT_TRIVIAL: switch ($bbtag['args']) { | |
230 | case 0: return "<$tname />"; | |
231 | case 1: return "<$tname>$arg0</$tname>"; | |
232 | case 2: return "<!-- This tag is invalid -->"; | |
233 | case 3: return "<!-- This tag is invalid -->"; | |
234 | default: return "<!-- This tag would be a security risk -->"; | |
235 | } | |
236 | case TT_INTERLEAVE: $h = $bbtag['html']; switch ($bbtag['args']) { | |
237 | case 0: return $h[0]; | |
238 | case 1: return $h[0] . $arg0 . $h[1]; | |
239 | case 2: return $h[0] . $arg1 . $h[1] . $arg0 . $h[2]; | |
240 | case 3: return $h[0] . $arg1 . $h[1] . $arg0 . $h[2] . $arg2 . $h[3]; | |
241 | default: return "<!-- This tag cannot be automated -->"; | |
242 | } | |
243 | case TT_CALLBACK: $f = $bbtag['func']; switch ($bbtag['args']) { | |
244 | case 0: return $f(); | |
245 | case 1: return $f($arg0); | |
246 | case 2: return $f($arg0, $arg1); | |
247 | case 3: return $f($arg0, $arg1, $arg2); | |
248 | default: return $f($arg0, $args); | |
249 | } | |
250 | } | |
251 | } | |
252 | ||
253 | ||
254 | /*! This nonsense is to parse associative tags. | |
255 | * @param $str The string from which to read attributes. | |
256 | * @param $i [in/out] The position from which to start reading. | |
257 | * Set at end of function call to denote the end of the list. | |
258 | * @return Returns the associative array read in from the string. | |
259 | */ | |
260 | function read_attr_list($str, &$i) { | |
261 | $len = strlen($str); | |
262 | $attrs = array(); | |
263 | while ($i < $len && $str[$i] != ']') | |
264 | { | |
265 | if (ctype_space($str[$i])) { | |
266 | ++$i; | |
267 | continue; | |
268 | } | |
269 | ||
270 | // Read an attribute name | |
271 | $attr_start = $i; | |
272 | while (++$i < $len && $str[$i] != '=' && !ctype_space($str[$i])); | |
273 | $attr_name = substr($str, $attr_start, $i-$attr_start); | |
274 | ||
275 | if ($i >= $len) // Bail if out of bounds | |
276 | { notice("OOB0: '$attr_name'"); return NULL; } | |
277 | ||
278 | // Read past the attribute name | |
279 | while ($i < $len && ctype_space($str[$i])) | |
280 | { notice("white: '" . $str[$i] . "'"); ++$i; } | |
281 | if ($i >= $len) // Bail if out of bounds | |
282 | { notice("OOB1: '$attr_name'"); return NULL; } | |
283 | ||
284 | if ($str[$i] == '=') | |
285 | { | |
286 | while (++$i < $len && ctype_space($str[$i])); | |
287 | if ($str[$i] == '"' || $str[$i] == '\'') | |
288 | { | |
289 | $val_start = $i + 1; | |
290 | $ochar = $str[$i]; | |
291 | while (++$i < $len && $str[$i] != $ochar) | |
292 | if ($str[$i] == '\\') ++$i; | |
293 | if ($i >= $len) | |
294 | { notice("OOB2"); return NULL; } | |
295 | $val = str_replace( | |
296 | array("\\\\", "\\\'", "\\\"", "\\r", "\\n", "\\t"), | |
297 | array("\\", "\'", "\"", "\r", "\n", "\t"), | |
298 | substr($str, $val_start, $i - $val_start) | |
299 | ); | |
300 | ++$i; | |
301 | } | |
302 | else { | |
303 | $val_start = $i; | |
304 | while (++$i < $len && $str[$i] != ']' && !ctype_space($str[$i])); | |
305 | if ($i >= $len) | |
306 | { notice("OOB3"); return NULL; } | |
307 | $val = substr($str, $val_start, $i - $val_start); | |
308 | } | |
309 | } | |
310 | else { | |
311 | $val = NULL; | |
312 | ++$i; | |
313 | } | |
314 | $attrs[$attr_name] = $val; | |
315 | } | |
316 | return $attrs; | |
317 | } | |
318 | ||
319 | /*! | |
320 | * Parse for non-BBCode elements, such as URLs or (God forbid) Markdown. | |
321 | * @param $str HTML-escaped plain text input. | |
322 | * @return Returns HTML-formatted parsed text. | |
323 | */ | |
324 | function parse_nonbb($str, $contenttype) | |
325 | { | |
326 | if ($contenttype != CT_MARKDOWN) | |
327 | return $str; | |
328 | ||
329 | $urlexp = "/(?i)\b(" | |
330 | . "(?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)" | |
331 | . "(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+" | |
332 | . "(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’])" | |
333 | .")/"; | |
334 | return preg_replace($urlexp, "<a href=\"$1\">$1</a>", $str); | |
335 | return $str; | |
336 | } | |
337 | ||
338 | /*! Main BBCode parser call. | |
339 | * @param $str The BBCode string to parse. | |
340 | * @return Returns the HTML parsed version of the input. | |
341 | */ | |
342 | function parse_bbcode($str) { | |
343 | $i = 0; | |
344 | $res = ""; | |
345 | while ($i !== false) { | |
346 | $res .= parse_sub($str, $i, CT_MARKDOWN, array()); | |
347 | if ($i !== false) | |
348 | $res .= $str[$i++]; | |
349 | } | |
350 | return $res; | |
351 | } | |
352 | ||
353 | $test = " | |
354 | [b]Bold[/b] | |
355 | [assoc one=\"two\" two=\"four\" three=eight four=sixteen]yes[/assoc] | |
356 | [/code] | |
357 | www.google.com | |
358 | ||
359 | [b]this should be[i] bold[/b] | |
360 | [b]this should not be, but this should be: [b] bold[/b] | |
361 | ||
362 | [hr] | |
363 | ||
364 | [ [b]bold[/b] ] | |
365 | [ [b]bold[/b] ] | |
366 | ||
367 | http://google.com/ | |
368 | www.google.com [ | |
369 | "; | |
370 | ||
371 | echo "=================================\nTest:\n=================================\n" . $test . "\n"; | |
372 | echo "=================================\nResult:\n=================================\n"; | |
373 | echo parse_bbcode($test); | |
374 | ||
375 | ?> |