Advertisement
krusader74

Markdown2BBCode.pl

Jan 26th, 2016
394
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 60.23 KB | None | 0 0
  1. #!/usr/bin/perl
  2.  
  3. #
  4. # Markdown2BBCode.pl -- A text-to-BBCode conversion tool for ProBoards users
  5. #
  6. # Derived from Markdown.pl -- A text-to-HTML conversion tool
  7. # Copyright (c) 2004 John Gruber
  8. # <http://daringfireball.net/projects/markdown/>
  9. #
  10. # Additional customizations for BBCode Written by Krusader74
  11. # http://odd74.proboards.com/user/2141
  12. # http://pastebin.com/u/krusader74
  13. # 26 Jan 2016
  14. # @ Copyleft All Wrongs Reserved
  15.  
  16.  
  17. package Markdown;
  18. use strict;
  19. use warnings;
  20. use Digest::MD5 qw(md5_hex);
  21. use vars qw($VERSION);
  22. $VERSION = '3.0';
  23.  
  24. use Switch;
  25. use YAML::Tiny;
  26. use Games::Dice 'roll';
  27.  
  28. use utf8;
  29. use feature 'unicode_strings';
  30. use open qw(:std :utf8);
  31.  
  32. binmode(STDIN,  ":utf8");
  33. binmode(STDOUT, ":utf8");
  34. binmode(STDERR, ":utf8");
  35.  
  36. #
  37. # Global default settings:
  38. #
  39. my $g_empty_element_suffix = "⟧";
  40. my $g_tab_width = 4;
  41.  
  42. # Because Markdown and BBCode both use square brackets [, ] heavily
  43. # we are going to use a trick and encode all BBCode using double square brackets ⟦, ⟧
  44. # until we get to the very end when we'll turn them back into regular square brackets.
  45.  
  46. #
  47. # Globals:
  48. #
  49.  
  50. # Regexen to match balanced [brackets], (parens), {braces} and <chevrons>. See Friedl's
  51. # "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
  52. my $g_nested_brackets;
  53. $g_nested_brackets = qr{
  54.     (?>                                 # Atomic matching
  55.        [^\[\]]+                         # Anything other than brackets
  56.      |
  57.        \[
  58.          (??{ $g_nested_brackets })     # Recursive set of nested brackets
  59.        \]
  60.     )*
  61. }x;
  62.  
  63. my $g_nested_parens;
  64. $g_nested_parens = qr{
  65.     (?>                                 # Atomic matching
  66.        [^\(\)]+                         # Anything other than parens
  67.      |
  68.        \(
  69.          (??{ $g_nested_parens })       # Recursive set of nested parens
  70.        \)
  71.     )*
  72. }x;
  73.  
  74. my $g_nested_braces;
  75. $g_nested_braces = qr{
  76.     (?>                                 # Atomic matching
  77.        [^\{\}]+                         # Anything other than braces
  78.      |
  79.        \{
  80.          (??{ $g_nested_braces })       # Recursive set of nested braces
  81.        \}
  82.     )*
  83. }x;
  84.  
  85. my $g_nested_chevrons;
  86. $g_nested_chevrons = qr{
  87.     (?>                                 # Atomic matching
  88.        [^\<\>]+                         # Anything other than chevrons
  89.      |
  90.        \<
  91.          (??{ $g_nested_chevrons })     # Recursive set of nested chevrons
  92.        \>
  93.     )*
  94. }x;
  95.  
  96. # Table of hash values for escaped characters:
  97. my %g_escape_table;
  98. foreach my $char (split //, '\\`*_{}[]()>#+-.!~') {
  99.     $g_escape_table{$char} = md5_hex($char);
  100. }
  101.  
  102.  
  103. # Global hashes, used by various utility routines
  104. my %g_urls;
  105. my %g_titles;
  106. my %g_html_blocks;
  107. my %g_footnotes;
  108. my $yaml;
  109. my @matharr;
  110. my $mathctr;
  111. my %mathsym;
  112. my %mathcal;
  113. my %mathbb;
  114. my %dice;
  115.  
  116. # Used to track when we're inside an ordered or unordered list
  117. # (see _ProcessListItems() for details):
  118. my $g_list_level = 0;
  119.  
  120.  
  121. #### Check for command-line switches: #################
  122. my %cli_opts;
  123. use Getopt::Long;
  124. Getopt::Long::Configure('pass_through');
  125. GetOptions(\%cli_opts,
  126.     'version',
  127.     'shortversion',
  128. );
  129. if ($cli_opts{'version'}) {     # Version info
  130.     print "\nThis is Markdown2BBcode, version $VERSION.\n";
  131.     print "Original Markdown script Copyright 2004 John Gruber\n";
  132.     print "http://daringfireball.net/projects/markdown/\n\n";
  133.     print "New support for BBCode @ CopyLeft 2016 Krusader74\n";
  134.     print "http://pastebin.com/u/krusader74\n\n";
  135.     exit 0;
  136. }
  137. if ($cli_opts{'shortversion'}) {        # Just the version number string.
  138.     print $VERSION;
  139.     exit 0;
  140. }
  141.  
  142.  
  143. #### Process incoming text: ###########################
  144. my $text;
  145. {
  146.     local $/;               # Slurp the whole file
  147.     $text = <>;
  148. }
  149. print Markdown($text);
  150.  
  151.  
  152. sub Markdown {
  153. #
  154. # Main function. The order in which other subs are called here is
  155. # essential. Link and image substitutions need to happen before
  156. # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
  157. # and <img> tags get encoded.
  158. #
  159.     my $text = shift;
  160.  
  161.     # Clear the global hashes. If we don't clear these, you get conflicts
  162.     # from other articles when generating a page which contains more than
  163.     # one article (e.g. an index page that shows the N most recent
  164.     # articles):
  165.     %g_urls = ();
  166.     %g_titles = ();
  167.     %g_html_blocks = ();
  168.     %g_footnotes = ();
  169.  
  170.     _InitMath();
  171.  
  172.     $text = _PreProcess($text);
  173.  
  174.     # Standardize line endings:
  175.     $text =~ s{\r\n}{\n}g;  # DOS to Unix
  176.     $text =~ s{\r}{\n}g;    # Mac to Unix
  177.  
  178.     # Make sure $text ends with a couple of newlines:
  179.     $text .= "\n\n";
  180.  
  181.     # Convert all tabs to spaces.
  182.     $text = _Detab($text);
  183.  
  184.     # Strip any lines consisting only of spaces and tabs.
  185.     # This makes subsequent regexen easier to write, because we can
  186.     # match consecutive blank lines with /\n+/ instead of something
  187.     # contorted like /[ \t]*\n+/ .
  188.     $text =~ s/^[ \t]+$//mg;
  189.  
  190.     # Parse the YAML header first, if there is one. Then process metadata variables
  191.     $text = _InitYAML($text);
  192.  
  193.     # Turn LaTeX math spans into array entries
  194.     $text = _HashMathSpans($text);
  195.  
  196.     # Turn block-level HTML blocks into hash entries
  197.     $text = _HashBBCodeBlocks($text);
  198.  
  199.     # Strip link definitions, store in hashes.
  200.     $text = _StripLinkDefinitions($text);
  201.  
  202.     $text = _StripFootnotes($text);
  203.  
  204.     $text = _RunBlockGamut($text);
  205.  
  206.     $text = _UnescapeSpecialChars($text);
  207.    
  208.     $text = _PostProcess($text);
  209.  
  210.     return $text . "\n";
  211. }
  212.  
  213.  
  214. sub _StripFootnotes {
  215. #
  216. # Strips footnotes from text, stores the URLs and titles in
  217. # hash references.
  218. #
  219.     my $text = shift;
  220.     my $less_than_tab = $g_tab_width - 1;
  221.     my $footnotes;
  222.  
  223.     # Footnotes are in the form: [^id]: text of note
  224.     while ($text =~ s{
  225.                 (                       # whole footnote = $1
  226.                         ^
  227.                         [ ]{0,$less_than_tab}
  228.                         (?:
  229.                             \[ \^ ( .+? ) \] :  # id = $2
  230.                         )
  231.                         [ \t]*
  232.                         (               # note = $3
  233.                             (?: .+? )
  234.                             (?: \n\n? [ ]{4} .+? )*
  235.                        
  236.                         )
  237.                         (
  238.                             \z
  239.                             |
  240.                             \n{2,}
  241.                         )
  242.  
  243.                 )
  244.             }{}mx) {
  245.         $g_footnotes{lc $2} = $3;
  246.     }
  247.  
  248.     my $count = keys %g_footnotes;
  249.     if($count > 0){
  250.         $footnotes = "⟦hr⟧\n";
  251.         foreach my $key (sort(keys(%g_footnotes))) {
  252.             $footnotes .= "⟦" . $key . "⟧: " .
  253.                 _RunBlockGamut(_Outdent($g_footnotes{$key} . "\n\n" )) . "\n\n";
  254.         }
  255.     }
  256.     return $text . $footnotes;
  257. }
  258.  
  259.  
  260. sub _StripLinkDefinitions {
  261. #
  262. # Strips link definitions from text, stores the URLs and titles in
  263. # hash references.
  264. #
  265.     my $text = shift;
  266.     my $less_than_tab = $g_tab_width - 1;
  267.  
  268.     # Link defs are in the form: ^[id]: url "optional title"
  269.     while ($text =~ s{
  270.                         ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1
  271.                           [ \t]*
  272.                           \n?               # maybe *one* newline
  273.                           [ \t]*
  274.                         <?(\S+?)>?          # url = $2
  275.                           [ \t]*
  276.                           \n?               # maybe one newline
  277.                           [ \t]*
  278.                         (?:
  279.                             (?<=\s)         # lookbehind for whitespace
  280.                             ["(]
  281.                             (.+?)           # title = $3
  282.                             [")]
  283.                             [ \t]*
  284.                         )?  # title is optional
  285.                         (?:\n+|\Z)
  286.                     }
  287.                     {}mx) {
  288.         $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 );    # Link IDs are case-insensitive
  289.         if ($3) {
  290.             $g_titles{lc $1} = $3;
  291.             $g_titles{lc $1} =~ s/"/&quot;/g;
  292.         }
  293.     }
  294.  
  295.     return $text;
  296. }
  297.  
  298.  
  299. sub _HashBBCodeBlocks {
  300.     my $text = shift;
  301.     my $less_than_tab = $g_tab_width - 1;
  302.  
  303.     # Hashify HTML blocks:
  304.     # We only want to do this for block-level HTML tags, such as headers,
  305.     # lists, and tables. That's because we still want to wrap <p>s around
  306.     # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  307.     # phrase emphasis, and spans. The list of tags we're looking for is
  308.     # hard-coded:
  309.     my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/;
  310.     my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/;
  311.  
  312.     # First, look for nested blocks, e.g.:
  313.     #   <div>
  314.     #       <div>
  315.     #       tags for inner block must be indented.
  316.     #       </div>
  317.     #   </div>
  318.     #
  319.     # The outermost tags must start at the left margin for this to match, and
  320.     # the inner nested divs must be indented.
  321.     # We need to do this before the next, more liberal match, because the next
  322.     # match will start at the first `<div>` and stop at the first `</div>`.
  323.     $text =~ s{
  324.                 (                       # save in $1
  325.                     ^                   # start of line  (with /m)
  326.                     \[($block_tags_a)   # start tag = $2
  327.                     \b                  # word break
  328.                     (.*\n)*?            # any number of lines, minimally matching
  329.                     \[/\2\]             # the matching end tag
  330.                     [ \t]*              # trailing spaces/tabs
  331.                     (?=\n+|\Z)  # followed by a newline or end of document
  332.                 )
  333.             }{
  334.                 my $key = md5_hex($1);
  335.                 $g_html_blocks{$key} = $1;
  336.                 "\n\n" . $key . "\n\n";
  337.             }egmx;
  338.  
  339.  
  340.     #
  341.     # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
  342.     #
  343.     $text =~ s{
  344.                 (                       # save in $1
  345.                     ^                   # start of line  (with /m)
  346.                     \[($block_tags_b)   # start tag = $2
  347.                     \b                  # word break
  348.                     (.*\n)*?            # any number of lines, minimally matching
  349.                     .*\[/\2\]               # the matching end tag
  350.                     [ \t]*              # trailing spaces/tabs
  351.                     (?=\n+|\Z)  # followed by a newline or end of document
  352.                 )
  353.             }{
  354.                 my $key = md5_hex($1);
  355.                 $g_html_blocks{$key} = $1;
  356.                 "\n\n" . $key . "\n\n";
  357.             }egmx;
  358.     # Special case just for <hr />. It was easier to make a special case than
  359.     # to make the other regex more complicated.
  360.     $text =~ s{
  361.                 (?:
  362.                     (?<=\n\n)       # Starting after a blank line
  363.                     |               # or
  364.                     \A\n?           # the beginning of the doc
  365.                 )
  366.                 (                       # save in $1
  367.                     [ ]{0,$less_than_tab}
  368.                     \[(hr)              # start tag = $2
  369.                     \b                  # word break
  370.                     ([^\[\]])*?         #
  371.                     /?\]                    # the matching end tag
  372.                     [ \t]*
  373.                     (?=\n{2,}|\Z)       # followed by a blank line or end of document
  374.                 )
  375.             }{
  376.                 my $key = md5_hex($1);
  377.                 $g_html_blocks{$key} = $1;
  378.                 "\n\n" . $key . "\n\n";
  379.             }egx;
  380.  
  381.     # Special case for standalone HTML comments:
  382.     $text =~ s{
  383.                 (?:
  384.                     (?<=\n\n)       # Starting after a blank line
  385.                     |               # or
  386.                     \A\n?           # the beginning of the doc
  387.                 )
  388.                 (                       # save in $1
  389.                     [ ]{0,$less_than_tab}
  390.                     (?s:
  391.                         <!
  392.                         (--.*?--\s*)+
  393.                         >
  394.                     )
  395.                     [ \t]*
  396.                     (?=\n{2,}|\Z)       # followed by a blank line or end of document
  397.                 )
  398.             }{}gx;
  399.  
  400.  
  401.     return $text;
  402. }
  403.  
  404.  
  405. sub _RunBlockGamut {
  406. #
  407. # These are all the transformations that form block-level
  408. # tags like paragraphs, headers, and list items.
  409. #
  410.     my $text = shift;
  411.  
  412.     $text = _DoMetadataVariables($text);
  413.  
  414.     $text = _DoTables($text);
  415.  
  416.     $text = _DoHeaders($text);
  417.  
  418.     $text = _DoFootnotes($text);
  419.  
  420.     $text = _DoLists($text);
  421.  
  422.     $text = _DoCodeBlocks($text);
  423.  
  424.     $text = _DoBlockQuotes($text);
  425.  
  426.   $text = _DoHorizontalRules($text);
  427.  
  428.     # We already ran _HashBBCodeBlocks() before, in Markdown(), but that
  429.     # was to escape raw HTML in the original Markdown source. This time,
  430.     # we're escaping the markup we've just created, so that we don't wrap
  431.     # <p> tags around block-level tags.
  432.     $text = _HashBBCodeBlocks($text);
  433.  
  434.     $text = _FormParagraphs($text);
  435.  
  436.     return $text;
  437. }
  438.  
  439.  
  440. sub _RunSpanGamut {
  441. #
  442. # These are all the transformations that occur *within* block-level
  443. # tags like paragraphs, headers, and list items.
  444. #
  445.     my $text = shift;
  446.  
  447.     $text = _DoCodeSpans($text);
  448.  
  449.     $text = _EscapeSpecialChars($text);
  450.  
  451.     # Process anchor and image tags. Images must come first,
  452.     # because ![foo][f] looks like an anchor.
  453.     $text = _DoAtCommands($text);
  454.     $text = _DoImages($text);
  455.     $text = _DoAnchors($text);
  456.  
  457.     # Make links out of things like `<http://example.com/>`
  458.     # Must come after _DoAnchors(), because you can use < and >
  459.     # delimiters in inline links like [this](<url>).
  460.     $text = _DoAutoLinks($text);
  461.  
  462.     $text = _EncodeAmpsAndAngles($text);
  463.  
  464.     $text = _DoItalicsAndBold($text);
  465.  
  466.     # WAS: Do hard breaks:
  467.     # $text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g;
  468.     $text =~ s/ {2,}\n/ \n\n/g;
  469.  
  470.     return $text;
  471. }
  472.  
  473.  
  474. sub _EscapeSpecialChars {
  475.     my $text = shift;
  476.     my $tokens ||= _TokenizeHTML($text);
  477.  
  478.     $text = '';   # rebuild $text from the tokens
  479. #   my $in_pre = 0;  # Keep track of when we're inside <pre> or <code> tags.
  480. #   my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
  481.  
  482.     foreach my $cur_token (@$tokens) {
  483.         if ($cur_token->[0] eq "tag") {
  484.             # Within tags, encode * and _ so they don't conflict
  485.             # with their use in Markdown for italics and strong.
  486.             # We're replacing each such character with its
  487.             # corresponding MD5 checksum value; this is likely
  488.             # overkill, but it should prevent us from colliding
  489.             # with the escape values by accident.
  490.             $cur_token->[1] =~  s! \* !$g_escape_table{'*'}!gx;
  491.             $cur_token->[1] =~  s! _  !$g_escape_table{'_'}!gx;
  492.             $cur_token->[1] =~  s! ~  !$g_escape_table{'~'}!gx;
  493.             $text .= $cur_token->[1];
  494.         } else {
  495.             my $t = $cur_token->[1];
  496.             $t = _EncodeBackslashEscapes($t);
  497.             $text .= $t;
  498.         }
  499.     }
  500.     return $text;
  501. }
  502.  
  503.  
  504. sub _DoHorizontalRules {
  505.     my $text = shift;
  506.     $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n⟦hr⟧\n}gmx;
  507.     $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n⟦hr⟧\n}gmx;
  508.     $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n⟦hr⟧\n}gmx;
  509.     return $text;
  510. }
  511.  
  512.  
  513. sub _DoAnchors {
  514. #
  515. # Turn Markdown link shortcuts into [a] tags.
  516. #
  517.     my $text = shift;
  518.  
  519.     #
  520.     # First, handle reference-style links: [link text] [id]
  521.     #
  522.     $text =~ s{
  523.         (                   # wrap whole match in $1
  524.           \[
  525.             ($g_nested_brackets)    # link text = $2
  526.           \]
  527.  
  528.           [ ]?              # one optional space
  529.           (?:\n[ ]*)?       # one optional newline followed by spaces
  530.  
  531.           \[
  532.             (.*?)       # id = $3
  533.           \]
  534.         )
  535.     }{
  536.         my $result;
  537.         my $whole_match = $1;
  538.         my $link_text   = $2;
  539.         my $link_id     = lc $3;
  540.  
  541.         if ($link_id eq "") {
  542.             $link_id = lc $link_text;     # for shortcut links like [this][].
  543.         }
  544.  
  545.         if (defined $g_urls{$link_id}) {
  546.             my $url = $g_urls{$link_id};
  547.             $url =~ s! \* !$g_escape_table{'*'}!gx;     # We've got to encode these to avoid
  548.             $url =~ s!  _ !$g_escape_table{'_'}!gx;     # conflicting with italics/bold.
  549.             $url =~ s!  ~ !$g_escape_table{'~'}!gx;     # conflicting with italics/bold.
  550.             $result = "⟦a href=\"$url\"";
  551.             if ( defined $g_titles{$link_id} ) {
  552.                 my $title = $g_titles{$link_id};
  553.                 $title =~ s! \* !$g_escape_table{'*'}!gx;
  554.                 $title =~ s!  _ !$g_escape_table{'_'}!gx;
  555.                 $title =~ s!  ~ !$g_escape_table{'~'}!gx;
  556.                 $result .=  " title=\"$title\"";
  557.             }
  558.             $result .= "⟧$link_text⟦/a⟧";
  559.         }
  560.         else {
  561.             $result = $whole_match;
  562.         }
  563.         $result;
  564.     }xsge;
  565.  
  566.     #
  567.     # Next, inline-style links: [link text](url "optional title")
  568.     #
  569.     $text =~ s{
  570.         (               # wrap whole match in $1
  571.           \[
  572.             ($g_nested_brackets)    # link text = $2
  573.           \]
  574.           \(            # literal paren
  575.             [ \t]*
  576.             <?(.*?)>?   # href = $3
  577.             [ \t]*
  578.             (           # $4
  579.               (['"])    # quote char = $5
  580.               (.*?)     # Title = $6
  581.               \5        # matching quote
  582.             )?          # title is optional
  583.           \)
  584.         )
  585.     }{
  586.         my $result;
  587.         my $whole_match = $1;
  588.         my $link_text   = $2;
  589.         my $url         = $3;
  590.         my $title       = $6;
  591.  
  592.         $url =~ s! \* !$g_escape_table{'*'}!gx;     # We've got to encode these to avoid
  593.         $url =~ s!  _ !$g_escape_table{'_'}!gx;     # conflicting with italics/bold.
  594.         $url =~ s!  ~ !$g_escape_table{'~'}!gx;     # conflicting with italics/bold.
  595.         $result = "⟦a href=\"$url\"";
  596.  
  597.         if (defined $title) {
  598.             $title =~ s/"/&quot;/g;
  599.             $title =~ s! \* !$g_escape_table{'*'}!gx;
  600.             $title =~ s!  _ !$g_escape_table{'_'}!gx;
  601.             $title =~ s!  ~ !$g_escape_table{'~'}!gx;
  602.             $result .=  " title=\"$title\"";
  603.         }
  604.  
  605.         $result .= "⟧$link_text⟦/a⟧";
  606.  
  607.         $result;
  608.     }xsge;
  609.  
  610.     return $text;
  611. }
  612.  
  613.  
  614. sub _DoFootnotes {
  615.   my $text = shift;
  616.  
  617.   # [^id]
  618.  
  619.   $text =~ s{
  620.     (           # wrap whole match in $1
  621.       \[ \^
  622.         (.*?)       # id = $2
  623.       \]
  624.     )
  625.   }{
  626.     my $key = lc $2;
  627.     "⟦sup⟧⟦" . $key . "⟧⟦/sup⟧";
  628.   }xsge;
  629.  
  630.   return $text;
  631. }
  632.  
  633.  
  634. sub _DoAtCommands {
  635. #
  636. # Turn @-command shortcuts into BBCode tags.
  637. #
  638.   my $text = shift;
  639.  
  640.   # @[command](argument)
  641.  
  642.   $text =~ s{
  643.     (                   # wrap whole match in $1
  644.         @ \[
  645.             (.*?)           # command = $2
  646.         \]
  647.         \(              # literal paren
  648.             ($g_nested_parens)  # argument = $3
  649.         \)
  650.     )
  651.   }{
  652.     my $command = $2;
  653.     my $argument = $3;
  654.     # Check for nested at commands
  655.     if($command !~ /nomarkdown/i && $argument =~ /@\[/) { $argument = _DoAtCommands($argument) }
  656.     _ParseAtCommand($command, $argument);
  657.   }xsge;
  658.  
  659.   return $text;
  660. }
  661.  
  662.  
  663. sub _ParseAtCommand {
  664.   my $result;
  665.   my $command = shift;
  666.   my $argument = shift;
  667.     switch ($command) {
  668.         case /eval/i { return eval $argument }
  669.         case /roll/i { return roll($argument) }
  670.         case /noprocess/i { return "@⟦noprocess⟧(${argument})" }
  671.         case /process/i { return "@⟦process⟧(${argument})" }
  672.         case /nomarkdown/i { $matharr[$mathctr] = ${argument}; return "(※" . $mathctr++ . ")" }
  673.         case /comment/i { return "" }
  674.         case /youtube/i { return "⟦video⟧http://www.youtube.com/watch?v=${argument}⟦/video⟧" }
  675.         case /video/i { return "⟦video⟧http://www.youtube.com/watch?v=${argument}⟦/video⟧" }
  676.         case /imgur/i { return "⟦img⟧http://i.imgur.com/${argument}⟦/img⟧" }
  677.         case /twitter/i { return "⟦twitter id=\"${argument}\"⟧" }
  678.         case /tweet/i { return "⟦twitter id=\"${argument}\"⟧" }
  679.         case /marquee/i { return "⟦marquee scrollamount=\"2\"⟧${argument}⟦/marquee⟧" }
  680.         case /move/i { return "⟦marquee scrollamount=\"2\"⟧${argument}⟦/marquee⟧" }
  681.         case /noparse/i { return "⟦noparse⟧${argument}⟦/noparse⟧" }
  682.         case /blockquote/i { return "⟦blockquote⟧${argument}⟦/blockquote⟧" }
  683.         case /quote/i { return "⟦quote⟧${argument}⟦/quote⟧" }
  684.         case /biggest/i { return "⟦font size=\"7\"⟧${argument}⟦/font⟧" }
  685.         case /bigger/i { return "⟦font size=\"6\"⟧${argument}⟦/font⟧" }
  686.         case /big/i { return "⟦font size=\"5\"⟧${argument}⟦/font⟧" }
  687.         case /smallest/i { return "⟦font size=\"1\"⟧${argument}⟦/font⟧" }
  688.         case /smaller/i { return "⟦font size=\"2\"⟧${argument}⟦/font⟧" }
  689.         case /small/i { return "⟦font size=\"3\"⟧${argument}⟦/font⟧" }
  690.         case /spoilers?/i { return "⟦spoiler⟧${argument}⟦/spoiler⟧" }
  691.         case /(red|yellow|pink|green|purple|orange|blue)/i { my $color = lc $command; return "⟦font color=\"${color}\"⟧${argument}⟦/font⟧" }
  692.         case /hide/i { return "⟦font color=\"efefef\"⟧${argument}⟦/color⟧" }
  693.         case /courier/i { return "⟦font face=\"courier new\"⟧${argument}⟦/font⟧" }
  694.         case /rot13/i { $_ = $argument; tr/A-Za-z/N-ZA-Mn-za-m/; return $_ }
  695.         case /reverse/i { return reverse $argument }
  696.         case /left/i { return "⟦div align=\"left\"⟧${argument}⟦/div⟧" }
  697.         case /right/i { return "⟦div align=\"right\"⟧${argument}⟦/div⟧" }
  698.         case /center/i { return "⟦div align=\"center\"⟧${argument}⟦/div⟧" }
  699.         case /justify/i { return "⟦div align=\"justify\"⟧${argument}⟦/div⟧" }
  700.         case /code/i { return "⟦code⟧${argument}⟦/code⟧" }
  701.         case /pre/i { return "⟦pre⟧${argument}⟦/pre⟧" }
  702.         case /sub/i { return "⟦sub⟧${argument}⟦/sub⟧" }
  703.         case /sup/i { return "⟦sup⟧${argument}⟦/sup⟧" }
  704.         case /hi/i { return "⟦highlight⟧${argument}⟦/highlight⟧" }
  705.         case /tt/i { return "⟦tt⟧${argument}⟦/tt⟧" }
  706.         case /b/i { return "⟦b⟧${argument}⟦/b⟧" }
  707.         case /i/i { return "⟦i⟧${argument}⟦/i⟧" }
  708.         case /s/i { return "⟦s⟧${argument}⟦/s⟧" }
  709.         case /u/i { return "⟦u⟧${argument}⟦/u⟧" }
  710.     else { return "failed to parse at-command ${command} with argument ${argument})" }
  711.     }
  712. }
  713.  
  714.  
  715. sub _DoImages {
  716. #
  717. # Turn Markdown image shortcuts into <img> tags.
  718. #
  719.     my $text = shift;
  720.  
  721.     #
  722.     # First, handle reference-style labeled images: ![alt text][id]
  723.     #
  724.     $text =~ s{
  725.         (               # wrap whole match in $1
  726.           !\[
  727.             (.*?)       # alt text = $2
  728.           \]
  729.  
  730.           [ ]?              # one optional space
  731.           (?:\n[ ]*)?       # one optional newline followed by spaces
  732.  
  733.           \[
  734.             (.*?)       # id = $3
  735.           \]
  736.  
  737.         )
  738.     }{
  739.         my $result;
  740.         my $whole_match = $1;
  741.         my $alt_text    = $2;
  742.         my $link_id     = lc $3;
  743.  
  744.         if ($link_id eq "") {
  745.             $link_id = lc $alt_text;     # for shortcut links like ![this][].
  746.         }
  747.  
  748.         $alt_text =~ s/"/&quot;/g;
  749.         if (defined $g_urls{$link_id}) {
  750.             my $url = $g_urls{$link_id};
  751.             $url =~ s! \* !$g_escape_table{'*'}!gx;     # We've got to encode these to avoid
  752.             $url =~ s!  _ !$g_escape_table{'_'}!gx;     # conflicting with italics/bold.
  753.             $url =~ s!  ~ !$g_escape_table{'~'}!gx;     # conflicting with italics/bold.
  754.             $result = "⟦img src=\"$url\" alt=\"$alt_text\"";
  755.             if (defined $g_titles{$link_id}) {
  756.                 my $title = $g_titles{$link_id};
  757.                 $title =~ s! \* !$g_escape_table{'*'}!gx;
  758.                 $title =~ s!  _ !$g_escape_table{'_'}!gx;
  759.                 $title =~ s!  ~ !$g_escape_table{'~'}!gx;
  760.                 $result .=  " title=\"$title\"";
  761.             }
  762.             $result .= $g_empty_element_suffix;
  763.         }
  764.         else {
  765.             # If there's no such link ID, leave intact:
  766.             $result = $whole_match;
  767.         }
  768.  
  769.         $result;
  770.     }xsge;
  771.  
  772.     #
  773.     # Next, handle inline images:  ![alt text](url "optional title")
  774.     # Don't forget: encode * and _
  775.  
  776.     $text =~ s{
  777.         (               # wrap whole match in $1
  778.           !\[
  779.             (.*?)       # alt text = $2
  780.           \]
  781.           \(            # literal paren
  782.             [ \t]*
  783.             <?(\S+?)>?  # src url = $3
  784.             [ \t]*
  785.             (           # $4
  786.               (['"])    # quote char = $5
  787.               (.*?)     # title = $6
  788.               \5        # matching quote
  789.               [ \t]*
  790.             )?          # title is optional
  791.           \)
  792.         )
  793.     }{
  794.         my $result;
  795.         my $whole_match = $1;
  796.         my $alt_text    = $2;
  797.         my $url         = $3;
  798.         my $title       = '';
  799.         if (defined($6)) {
  800.             $title      = $6;
  801.         }
  802.  
  803.         $alt_text =~ s/"/&quot;/g;
  804.         $title    =~ s/"/&quot;/g;
  805.         $url =~ s! \* !$g_escape_table{'*'}!gx;     # We've got to encode these to avoid
  806.         $url =~ s!  _ !$g_escape_table{'_'}!gx;     # conflicting with italics/bold.
  807.         $url =~ s!  ~ !$g_escape_table{'~'}!gx;     # conflicting with italics/bold.
  808.         $result = "⟦img src=\"$url\" alt=\"$alt_text\"";
  809.         if (defined $title) {
  810.             $title =~ s! \* !$g_escape_table{'*'}!gx;
  811.             $title =~ s!  _ !$g_escape_table{'_'}!gx;
  812.             $title =~ s!  ~ !$g_escape_table{'~'}!gx;
  813.             $result .=  " title=\"$title\"";
  814.         }
  815.         $result .= $g_empty_element_suffix;
  816.  
  817.         $result;
  818.     }xsge;
  819.  
  820.     return $text;
  821. }
  822.  
  823.  
  824. sub _DoHeaders {
  825.     my $text = shift;
  826.  
  827.  
  828.     my @h_start = ("", "⟦font size=\"6\"⟧⟦b⟧", "⟦font size=\"5\"⟧⟦b⟧", "⟦font size=\"5\"⟧⟦b⟧⟦i⟧", "⟦font size=\"4\"⟧⟦b⟧", "⟦font size=\"4\"⟧⟦b⟧⟦i⟧", "⟦font size=\"4\"⟧⟦i⟧");
  829.     my @h_end = ("", "⟦/b⟧⟦/font⟧", "⟦/b⟧⟦/font⟧", "⟦/i⟧⟦/b⟧⟦/font⟧", "⟦/b⟧⟦/font⟧", "⟦/i⟧⟦/b⟧⟦/font⟧", "⟦/i⟧⟦/font⟧");
  830.  
  831.     # Setext-style headers:
  832.     #     Header 1
  833.     #     ========
  834.     #  
  835.     #     Header 2
  836.     #     --------
  837.     #
  838.     $text =~ s{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{
  839.         $h_start[1]  .  _RunSpanGamut($1)  .  $h_end[1] . "\n\n";
  840.     }egmx;
  841.  
  842.     $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{
  843.         $h_start[2]  .  _RunSpanGamut($1)  .  $h_end[2] . "\n\n";
  844.     }egmx;
  845.  
  846.  
  847.     # atx-style headers:
  848.     #   # Header 1
  849.     #   ## Header 2
  850.     #   ## Header 2 with closing hashes ##
  851.     #   ...
  852.     #   ###### Header 6
  853.     #
  854.     $text =~ s{
  855.             ^(\#{1,6})  # $1 = string of #'s
  856.             [ \t]*
  857.             (.+?)       # $2 = Header text
  858.             [ \t]*
  859.             \#*         # optional closing #'s (not counted)
  860.             \n+
  861.         }{
  862.             my $h_level = length($1);
  863.             $h_start[$h_level]  .  _RunSpanGamut($2)  .  $h_end[$h_level] . "\n\n";
  864.         }egmx;
  865.  
  866.     return $text;
  867. }
  868.  
  869.  
  870. sub _DoTables {
  871. #
  872. # Form BBCode tables
  873. #
  874.   my $text = shift;
  875.  
  876.   my $less_than_tab = $g_tab_width - 1;
  877.  
  878.   #
  879.   # Multiline table with headers
  880.   #
  881.  
  882.   my $re_table_demarcation = qr{ ^ [ \t]* -{3,} \n }mx;
  883.  
  884.   my $re_table_divider_column = qr{ -{3,} }mx;
  885.  
  886.   my $re_table_divider = qr{
  887.     ^
  888.     [ \t]*
  889.     ${re_table_divider_column}              # Divider Head
  890.     (?:[ \t]+ ${re_table_divider_column})+  # Divider Tail
  891.     \n
  892.   }mx;
  893.  
  894.   my $re_table_row = qr{
  895.     (?:[^\n]+\n)+    # One or more non-blank lines
  896.     (?!$re_table_demarcation)
  897.   }mx;
  898.  
  899.   my $re_table_rows = qr{
  900.     (?: ${re_table_row} \n)*
  901.     (?:[^\n]+\n)+    # One or more non-blank lines
  902.     (?=$re_table_demarcation)
  903.   }mx;
  904.  
  905.   my $re_header_row = qr{
  906.     $re_table_row
  907.     (?=$re_table_divider)
  908.   }mx;
  909.  
  910.   my $re_table = qr{
  911.     (                      # $1 = whole table
  912.       $re_table_demarcation    # row of dashes, before the header text
  913.       ($re_header_row)         # $2 = header row
  914.       ($re_table_divider)      # $3 = table divider
  915.       (${re_table_rows})       # $4 = rows in table body
  916.       $re_table_demarcation    # end with a row of dashes
  917.       (?:\z|\n)                # then a blank line
  918.     )
  919.   }mx;
  920.  
  921.   $text =~ s{$re_table}{
  922.     my $header = $2;
  923.     my $divider = $3;
  924.     my $rows = $4;
  925.     my $bbc = "";
  926.     my @rows = split(/\n\n/, $rows);
  927.         my @align = get_row_alignments($header, $divider);
  928.     for my $r (@rows) {
  929.       my $c = 0;
  930.       $bbc .= "⟦tr⟧" .
  931.         join('', map {'⟦td align="' . $align[$c++] . '"⟧' . _RunBlockGamut(_Outdent($_)) . '⟦/td⟧'} get_fixed_width_row($r, $divider)) .
  932.         "⟦/tr⟧\n";
  933.     }
  934.     $bbc = "⟦table⟧⟦thead⟧⟦tr⟧" . join('', map {'⟦th⟧' . $_ . '⟦/th⟧'} get_fixed_width_row($header, $divider)) . "⟦/tr⟧⟦/thead⟧\n" .
  935.       "⟦tbody⟧" . $bbc . "⟦/tbody⟧⟦/table⟧\n\n";
  936.     $bbc;
  937.   }egmx;
  938.  
  939.   #
  940.   # Multiline table without headers
  941.   #
  942.  
  943.   $re_table_row = qr{
  944.     (?:[^\n]+\n)+    # One or more non-blank lines
  945.     (?!$re_table_divider)
  946.   }mx;
  947.  
  948.   $re_table_rows = qr{
  949.     (?: ${re_table_row} \n)+
  950.     (?:[^\n]+\n)+    # One or more non-blank lines
  951.     (?=$re_table_divider)
  952.   }mx;
  953.  
  954.   my $re_table_wo_headers = qr{
  955.     (                      # $1 = whole table
  956.       ($re_table_divider)      # $2 = table divider
  957.       (${re_table_rows})       # $3 = rows in table body
  958.       $re_table_divider        # table divider
  959.       (?:\z|\n)                # then a blank line
  960.     )
  961.   }mx;
  962.  
  963.   $text =~ s{$re_table_wo_headers}{
  964.     my $divider = $2;
  965.     my $rows = $3;
  966.     my $bbc = "";
  967.     my @rows = split(/\n\n/, $rows);
  968.         my @align = get_row_alignments($rows[0], $divider);
  969.     for my $r (@rows) {
  970.       my $c = 0;
  971.       $bbc .= "⟦tr⟧" .
  972.         join('', map {'⟦td align="' . $align[$c++] . '"⟧' . _RunBlockGamut(_Outdent($_)) . '⟦/td⟧'} get_fixed_width_row($r, $divider)) .
  973.         "⟦/tr⟧\n";
  974.     }
  975.     $bbc = "⟦table⟧⟦tbody⟧" . $bbc . "⟦/tbody⟧⟦/table⟧\n\n";
  976.     $bbc;
  977.     }egmx;
  978.  
  979.   #
  980.   # Simple table with headers
  981.   #
  982.  
  983.   $re_header_row = qr{
  984.     [^\n]+ \n                  # One non-blank line
  985.     (?=$re_table_divider)
  986.   }mx;
  987.  
  988.   $re_table_row = qr{
  989.     [^\n]+ \n                  # One non-blank line
  990.     (?!$re_table_divider)
  991.   }mx;
  992.  
  993.   $re_table_rows = qr{
  994.     (?:${re_table_row})+
  995.     (?=\n|\z)
  996.   }mx;
  997.  
  998.   my $re_simple_table = qr{
  999.     (                      # $1 = whole table
  1000.       ($re_header_row)         # $2 = header row
  1001.       ($re_table_divider)      # $3 = table divider
  1002.       (${re_table_rows})       # $4 = rows in table body
  1003.       (?:\z|\n)                # then a blank line
  1004.     )
  1005.   }mx;
  1006.  
  1007.   $text =~ s{$re_simple_table}{
  1008.     my $header = $2;
  1009.     my $divider = $3;
  1010.     my $rows = $4;
  1011.     my $bbc = "";
  1012.     my @rows = split(/\n/, $rows);
  1013.         my @align = get_row_alignments($header, $divider);
  1014.     for my $r (@rows) {
  1015.       my $c = 0;
  1016.       $bbc .= "⟦tr⟧" .
  1017.         join('', map {'⟦td align="' . $align[$c++] . '"⟧' . $_ . '⟦/td⟧'} get_fixed_width_row($r, $divider)) .
  1018.         "⟦/tr⟧\n";
  1019.     }
  1020.     $bbc = "⟦table⟧⟦thead⟧⟦tr⟧" . join('', map {'⟦th⟧' . $_ . '⟦/th⟧'} get_fixed_width_row($header, $divider)) . "⟦/tr⟧⟦/thead⟧\n" .
  1021.       "⟦tbody⟧" . $bbc . "⟦/tbody⟧⟦/table⟧\n\n";
  1022.     $bbc;
  1023.     }egmx;
  1024.  
  1025.   #
  1026.   # Simple table without headers
  1027.   #
  1028.  
  1029.   $re_table_row = qr{
  1030.     [^\n]+ \n                  # One non-blank line
  1031.     (?!$re_table_divider)
  1032.   }mx;
  1033.  
  1034.   $re_table_rows = qr{
  1035.     (?:${re_table_row})*
  1036.     [^\n]+\n                   # One non-blank line
  1037.     (?=$re_table_divider)
  1038.   }mx;
  1039.  
  1040.   my $re_simple_table_wo_headers = qr{
  1041.     (                      # $1 = whole table
  1042.       ($re_table_divider)      # $2 = table divider
  1043.       (${re_table_rows})       # $3 = rows in table body
  1044.       $re_table_divider        # table divider
  1045.       (?:\z|\n)                # then a blank line
  1046.     )
  1047.   }mx;
  1048.  
  1049.   $text =~ s{$re_simple_table_wo_headers}{
  1050.     my $divider = $2;
  1051.     my $rows = $3;
  1052.     my $bbc = "";
  1053.     my @rows = split(/\n/, $rows);
  1054.         my @align = get_row_alignments($rows[0], $divider);
  1055.     for my $r (@rows) {
  1056.       my $c = 0;
  1057.       $bbc .= "⟦tr⟧" .
  1058.         join('', map {'⟦td align="' . $align[$c++] . '"⟧' . $_ . '⟦/td⟧'} get_fixed_width_row($r, $divider)) .
  1059.         "⟦/tr⟧\n";
  1060.     }
  1061.     $bbc = "⟦table⟧⟦tbody⟧" . $bbc . "⟦/tbody⟧⟦/table⟧\n\n";
  1062.     $bbc;
  1063.     }egmx;
  1064.  
  1065.   #
  1066.   # Grid tables
  1067.   #
  1068.  
  1069.   my $re_row_demarcation = qr{ ^ [ \t]* \+ (?:-{3,}\+)+ \n }mx;
  1070.  
  1071.   my $re_table_divider_row = qr{ ^ [ \t]* \+ (?:\={3,}\+)+ \n }mx;
  1072.  
  1073.   $re_table_row = qr{
  1074.     (?: \| (?:[^\|]+ \|)+ \n)+    # One or more non-blank lines
  1075.     (?=$re_row_demarcation)
  1076.   }mx;
  1077.  
  1078.   $re_table_rows = qr{
  1079.     $re_table_row
  1080.     $re_row_demarcation
  1081.   }mx;
  1082.  
  1083.   $re_header_row = qr{
  1084.     (?: \| (?:[^\|]+ \|)+ \n)+    # One or more non-blank lines
  1085.     (?=$re_table_divider_row)
  1086.   }mx;
  1087.  
  1088.   $re_table = qr{
  1089.     (                      # $1 = whole table
  1090.       $re_row_demarcation    # row of dashes, before the header text
  1091.       ($re_header_row)         # $2 = header row
  1092.       $re_table_divider_row    # table divider
  1093.       (${re_table_rows}+)      # $3 = rows in table body
  1094.       (?:\z|\n)                # then a blank line
  1095.     )
  1096.   }mx;
  1097.  
  1098.   $text =~ s{$re_table}{
  1099.     my $header = $2;
  1100.     my $rows = $3;
  1101.     my $bbc = "";
  1102.     my @rows = split($re_row_demarcation, $rows);
  1103.     for my $r (@rows) {
  1104.       $bbc .= "⟦tr⟧" .
  1105.         join('', map {'⟦td⟧' . _RunBlockGamut(_Outdent($_)) . '⟦/td⟧'} get_grid_row($r)) .
  1106.         "⟦/tr⟧\n";
  1107.     }
  1108.     $bbc = "⟦table⟧⟦thead⟧⟦tr⟧" . join('', map {'⟦th⟧' . $_ . '⟦/th⟧'} get_grid_row($header)) . "⟦/tr⟧⟦/thead⟧\n" .
  1109.       "⟦tbody⟧" . $bbc . "⟦/tbody⟧⟦/table⟧\n\n";
  1110.     $bbc;
  1111.   }egmx;
  1112.  
  1113.   #
  1114.   # Pipe tables
  1115.   #
  1116.  
  1117.   $re_table_divider_column = qr{
  1118.     [ \t]*
  1119.     :?
  1120.     -{3,}
  1121.     :?
  1122.     [ \t]*
  1123.   }x;
  1124.  
  1125.   $re_table_divider = qr{
  1126.     \|?                                          # Optional first pipe
  1127.     (?:${re_table_divider_column})               # Divider Head
  1128.     (?:(?:\+|\|)${re_table_divider_column})*     # Divider Tail
  1129.     \|?                                          # Optional last pipe
  1130.     \n
  1131.   }mx;
  1132.  
  1133.   $re_table_row = qr{
  1134.     \|?                # Optional first pipe
  1135.     [^\|\n]+           # Row head
  1136.     (?:\|[^\|\n]+)*    # Row tail
  1137.     \|?                # Optional last pipe
  1138.     (?:\n|\z)
  1139.   }mx;
  1140.  
  1141.   $re_table = qr{
  1142.     (                      # $1 = whole table
  1143.       ($re_table_row)          # $2 = header row
  1144.       ($re_table_divider)      # $3 = table divider
  1145.       (${re_table_row}+)       # $4 = rows in table body
  1146.       (\z|\n)                  # $5
  1147.     )
  1148.   }mx;
  1149.  
  1150.   $text =~ s{
  1151.       $re_table
  1152.     }{
  1153.       my $table = $1;
  1154.       my $header = $2;
  1155.       my $divider = $3;
  1156.       my $body = $4;
  1157.       my @headings = map {trim($_)} ($header =~ /[^\|\n]+/g);
  1158.       my @cols = $divider =~ /${re_table_divider_column}/g;
  1159.       my @rows = $body =~ /${re_table_row}/g;
  1160.       # BBCode
  1161.       my $bbc = "";
  1162.       my $a = "";
  1163.       my $c = 0;
  1164.       my $n = scalar @cols;
  1165.       for(my $r=0; $r < scalar @rows; $r++) {
  1166.         $bbc .= "⟦tr⟧" .
  1167.           join('', map {'⟦td' . (($a = align($cols[$c++ % $n])) ne "" ? (' align="' . $a . '"') : '') . '⟧' . $_ . '⟦/td⟧'} cells($rows[$r])) .
  1168.           "⟦/tr⟧\n";
  1169.       }
  1170.       $bbc = "⟦table⟧⟦thead⟧⟦tr⟧" . join('', map {'⟦th⟧' . $_ . '⟦/th⟧'} @headings) . "⟦/tr⟧⟦/thead⟧\n" .
  1171.         "⟦tbody⟧" . $bbc . "⟦/tbody⟧⟦/table⟧\n\n";
  1172.       $bbc;
  1173.     }egmx;
  1174.  
  1175.   return $text;
  1176. }
  1177.  
  1178. sub align {
  1179.   my $text = shift;
  1180.   if($text =~ /\:-{3,}\:/){ return "center"; }
  1181.   if($text =~ /\:-{3,}/){ return "left"; }
  1182.   if($text =~ /-{3,}\:/){ return "right"; }
  1183.   return "";
  1184. }
  1185.  
  1186. sub trim {
  1187.   my $text = shift;
  1188.   $text =~ s/^\s+//;
  1189.   $text =~ s/\s+$//;
  1190.   return $text;
  1191. }
  1192.  
  1193. sub cells {
  1194.   my $text = shift;
  1195.   $text = trim($text);
  1196.   $text =~ s/^\|//;
  1197.   $text =~ s/\|$//;
  1198.   my @cells = map {trim($_)} (split /\|/, $text);
  1199.   return @cells;
  1200. }
  1201.  
  1202. sub get_grid_row {
  1203.   my $row = shift;
  1204.     my @cols = ();
  1205.     my @lines = split(/\n/, $row);
  1206.     foreach my $l (@lines) {
  1207.         my $c = 0;
  1208.         $l =~ s/^\s*\|//;
  1209.         $l =~ s/\|\s*$//;
  1210.         map {$cols[$c++].=trim($_)."\n"} split(/\|/, $l);
  1211.     }
  1212. # return map {$_ =~ s/\n+\z//; $_} @cols;
  1213.     return @cols;
  1214. }
  1215.  
  1216. sub get_column_widths {
  1217.   my $divider = shift;
  1218.   my $trim_trailing_spaces = shift;
  1219.   $divider =~ s/\s+$//;
  1220.   my @divs = split(/(?<=\s)(?=-)/, $divider);
  1221.   if($trim_trailing_spaces){ my @temp = map { s/(\s+)$//mgx; $_; } @divs; @divs = @temp; }
  1222.   my @widths = map {length $_} @divs;
  1223.   my $n = scalar @widths;
  1224.   my $i = 0;
  1225. # print "$n columns:\n";
  1226. # foreach $w (@widths) {print "column ", $i, ": ", $w, " width: '", $divs[$i++] ,"'\n";}
  1227. # print "\n\n";
  1228.   return @widths;
  1229. }
  1230.  
  1231. sub get_row_alignments {
  1232.   my @lines = split(/\n/, shift); # A row consists of more than one line
  1233.   my $divider = shift;
  1234.   my @widths = get_column_widths($divider, 0);
  1235.   my @divs = get_column_widths($divider, 1);
  1236.   my $offset;
  1237.   my $n = scalar @widths;
  1238.   my @aligns = ();
  1239.   my $l = $lines[0];
  1240.   $offset = 0;
  1241.   for(my $c = 0; $c < $n; $c++) {
  1242.     my $col .= substr($l, $offset, $divs[$c]);
  1243.     if($col =~ /^\s+[^\s]+\s+$/){ $aligns[$c] = "center"; }
  1244.     elsif($col =~ /^\s+[^\s]+$/){ $aligns[$c] = "right"; }
  1245.     elsif($col =~ /^[^\s]+\s+$/){ $aligns[$c] = "left"; }
  1246.     elsif(length($col) < $divs[$c]){ $aligns[$c] = "left"; }
  1247.     else { $aligns[$c] = "left"; } # default
  1248.     $offset += $widths[$c];
  1249. #   print "column ", $c, " is '", $col, "' with align = '", $aligns[$c], "'\n";
  1250.   }
  1251.     return @aligns;
  1252. }
  1253.  
  1254. sub get_fixed_width_row {
  1255.   my @lines = split(/\n/, shift); # A row consists of more than one line
  1256.   my $divider = shift;
  1257.   my $is_header = shift || 0;
  1258.   my $rv;
  1259.   my @widths = get_column_widths($divider);
  1260.   my $offset;
  1261.   my $n = scalar @widths;
  1262.   my @cols = ();
  1263.   for my $l (@lines) {
  1264.     $offset = 0;
  1265.     for(my $c = 0; $c < $n; $c++) {
  1266.       $cols[$c] .= trim(substr($l, $offset, $widths[$c])) . "\n";
  1267.       $offset += $widths[$c];
  1268.     }
  1269.   };
  1270. # for(my $c = 0; $c < $n; $c++) {
  1271. #   # trim trailing blank lines:
  1272. #   $cols[$c] =~ s/\n+\z//;
  1273. # }
  1274.   return @cols;
  1275. };
  1276.  
  1277.  
  1278. sub _DoLists {
  1279. #
  1280. # Form HTML ordered (numbered) and unordered (bulleted) lists.
  1281. #
  1282.     my $text = shift;
  1283.     my $less_than_tab = $g_tab_width - 1;
  1284.  
  1285.     # Re-usable patterns to match list item bullets and number markers:
  1286.     my $marker_ul  = qr/[*+-]/;
  1287.     my $marker_ol  = qr/\d+[.]/;
  1288.     my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
  1289.  
  1290.     # Re-usable pattern to match any entirel ul or ol list:
  1291.     my $whole_list = qr{
  1292.         (                               # $1 = whole list
  1293.           (                             # $2
  1294.             [ ]{0,$less_than_tab}
  1295.             (${marker_any})             # $3 = first list item marker
  1296.             [ \t]+
  1297.           )
  1298.           (?s:.+?)
  1299.           (                             # $4
  1300.               \z
  1301.             |
  1302.               \n{2,}
  1303.               (?=\S)
  1304.               (?!                       # Negative lookahead for another list item marker
  1305.                 [ \t]*
  1306.                 ${marker_any}[ \t]+
  1307.               )
  1308.           )
  1309.         )
  1310.     }mx;
  1311.  
  1312.     # We use a different prefix before nested lists than top-level lists.
  1313.     # See extended comment in _ProcessListItems().
  1314.     #
  1315.     # Note: There's a bit of duplication here. My original implementation
  1316.     # created a scalar regex pattern as the conditional result of the test on
  1317.     # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
  1318.     # substitution once, using the scalar as the pattern. This worked,
  1319.     # everywhere except when running under MT on my hosting account at Pair
  1320.     # Networks. There, this caused all rebuilds to be killed by the reaper (or
  1321.     # perhaps they crashed, but that seems incredibly unlikely given that the
  1322.     # same script on the same server ran fine *except* under MT. I've spent
  1323.     # more time trying to figure out why this is happening than I'd like to
  1324.     # admit. My only guess, backed up by the fact that this workaround works,
  1325.     # is that Perl optimizes the substition when it can figure out that the
  1326.     # pattern will never change, and when this optimization isn't on, we run
  1327.     # afoul of the reaper. Thus, the slightly redundant code to that uses two
  1328.     # static s/// patterns rather than one conditional pattern.
  1329.  
  1330.     if ($g_list_level) {
  1331.         $text =~ s{
  1332.                 ^
  1333.                 $whole_list
  1334.             }{
  1335.                 my $list = $1;
  1336.                 my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
  1337.                 # Turn double returns into triple returns, so that we can make a
  1338.                 # paragraph for the last item in a list, if necessary:
  1339.                 $list =~ s/\n{2,}/\n\n\n/g;
  1340.                 my $result = _ProcessListItems($list, $marker_any);
  1341.                 $result = "⟦$list_type⟧\n" . $result . "⟦/$list_type⟧\n";
  1342.                 $result;
  1343.             }egmx;
  1344.     }
  1345.     else {
  1346.         $text =~ s{
  1347.                 (?:(?<=\n\n)|\A\n?)
  1348.                 $whole_list
  1349.             }{
  1350.                 my $list = $1;
  1351.                 my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
  1352.                 # Turn double returns into triple returns, so that we can make a
  1353.                 # paragraph for the last item in a list, if necessary:
  1354.                 $list =~ s/\n{2,}/\n\n\n/g;
  1355.                 my $result = _ProcessListItems($list, $marker_any);
  1356.                 $result = "⟦$list_type⟧\n" . $result . "⟦/$list_type⟧\n";
  1357.                 $result;
  1358.             }egmx;
  1359.     }
  1360.  
  1361.  
  1362.     return $text;
  1363. }
  1364.  
  1365.  
  1366. sub _ProcessListItems {
  1367. #
  1368. #   Process the contents of a single ordered or unordered list, splitting it
  1369. #   into individual list items.
  1370. #
  1371.  
  1372.     my $list_str = shift;
  1373.     my $marker_any = shift;
  1374.  
  1375.  
  1376.     # The $g_list_level global keeps track of when we're inside a list.
  1377.     # Each time we enter a list, we increment it; when we leave a list,
  1378.     # we decrement. If it's zero, we're not in a list anymore.
  1379.     #
  1380.     # We do this because when we're not inside a list, we want to treat
  1381.     # something like this:
  1382.     #
  1383.     #       I recommend upgrading to version
  1384.     #       8. Oops, now this line is treated
  1385.     #       as a sub-list.
  1386.     #
  1387.     # As a single paragraph, despite the fact that the second line starts
  1388.     # with a digit-period-space sequence.
  1389.     #
  1390.     # Whereas when we're inside a list (or sub-list), that line will be
  1391.     # treated as the start of a sub-list. What a kludge, huh? This is
  1392.     # an aspect of Markdown's syntax that's hard to parse perfectly
  1393.     # without resorting to mind-reading. Perhaps the solution is to
  1394.     # change the syntax rules such that sub-lists must start with a
  1395.     # starting cardinal number; e.g. "1." or "a.".
  1396.  
  1397.     $g_list_level++;
  1398.  
  1399.     # trim trailing blank lines:
  1400.     $list_str =~ s/\n{2,}\z/\n/;
  1401.  
  1402.  
  1403.     $list_str =~ s{
  1404.         (\n)?                           # leading line = $1
  1405.         (^[ \t]*)                       # leading whitespace = $2
  1406.         ($marker_any) [ \t]+            # list marker = $3
  1407.         ((?s:.+?)                       # list item text   = $4
  1408.         (\n{1,2}))
  1409.         (?= \n* (\z | \2 ($marker_any) [ \t]+))
  1410.     }{
  1411.         my $item = $4;
  1412.         my $leading_line = $1;
  1413.         my $leading_space = $2;
  1414.  
  1415.         if ($leading_line or ($item =~ m/\n{2,}/)) {
  1416.             $item = _RunBlockGamut(_Outdent($item));
  1417.         }
  1418.         else {
  1419.             # Recursion for sub-lists:
  1420.             $item = _DoLists(_Outdent($item));
  1421.             chomp $item;
  1422.             $item = _RunSpanGamut($item);
  1423.         }
  1424.  
  1425.         "⟦li⟧" . $item . "⟦/li⟧\n";
  1426.     }egmx;
  1427.  
  1428.     $g_list_level--;
  1429.     return $list_str;
  1430. }
  1431.  
  1432.  
  1433.  
  1434. sub _DoCodeBlocks {
  1435. #
  1436. #   Process Markdown `<pre><code>` blocks.
  1437. #  
  1438.  
  1439.     my $text = shift;
  1440.  
  1441.     $text =~ s{
  1442.             (?:\n\n|\A)
  1443.             (               # $1 = the code block -- one or more lines, starting with a space/tab
  1444.               (?:
  1445.                 (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
  1446.                 .*\n+
  1447.               )+
  1448.             )
  1449.             ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
  1450.         }{
  1451.             my $codeblock = $1;
  1452.             my $result; # return value
  1453.  
  1454.             $codeblock = _EncodeCode(_Outdent($codeblock));
  1455.             $codeblock = _Detab($codeblock);
  1456.             $codeblock =~ s/\A\n+//; # trim leading newlines
  1457.             $codeblock =~ s/\s+\z//; # trim trailing whitespace
  1458.  
  1459.             $result = "\n\n⟦code⟧" . $codeblock . "⟦/code⟧\n\n";
  1460.  
  1461.             $result;
  1462.         }egmx;
  1463.  
  1464.     return $text;
  1465. }
  1466.  
  1467.  
  1468. sub _DoCodeSpans {
  1469. #
  1470. #   *   Backtick quotes are used for <code></code> spans.
  1471. #
  1472. #   *   You can use multiple backticks as the delimiters if you want to
  1473. #       include literal backticks in the code span. So, this input:
  1474. #    
  1475. #         Just type ``foo `bar` baz`` at the prompt.
  1476. #    
  1477. #       Will translate to:
  1478. #    
  1479. #         <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
  1480. #    
  1481. #       There's no arbitrary limit to the number of backticks you
  1482. #       can use as delimters. If you need three consecutive backticks
  1483. #       in your code, use four for delimiters, etc.
  1484. #
  1485. #   *   You can use spaces to get literal backticks at the edges:
  1486. #    
  1487. #         ... type `` `bar` `` ...
  1488. #    
  1489. #       Turns to:
  1490. #    
  1491. #         ... type <code>`bar`</code> ...
  1492. #
  1493.  
  1494.     my $text = shift;
  1495.  
  1496.     $text =~ s@
  1497.             (`+)        # $1 = Opening run of `
  1498.             (.+?)       # $2 = The code block
  1499.             (?<!`)
  1500.             \1          # Matching closer
  1501.             (?!`)
  1502.         @
  1503.             my $c = "$2";
  1504.             $c =~ s/^[ \t]*//g; # leading whitespace
  1505.             $c =~ s/[ \t]*$//g; # trailing whitespace
  1506.             $c = _EncodeCode($c);
  1507.             "⟦code⟧$c⟦/code⟧";
  1508.         @egsx;
  1509.  
  1510.     return $text;
  1511. }
  1512.  
  1513.  
  1514. sub _EncodeCode {
  1515. #
  1516. # Encode/escape certain characters inside Markdown code runs.
  1517. # The point is that in code, these characters are literals,
  1518. # and lose their special Markdown meanings.
  1519. #
  1520.     local $_ = shift;
  1521.  
  1522.     # Encode all ampersands; HTML entities are not
  1523.     # entities within a Markdown code span.
  1524.     s/&/&amp;/g;
  1525.  
  1526.     # Encode $'s, but only if we're running under Blosxom.
  1527.     # (Blosxom interpolates Perl variables in article bodies.)
  1528.     {
  1529.         no warnings 'once';
  1530.         if (defined($blosxom::version)) {
  1531.             s/\$/&#036;/g; 
  1532.         }
  1533.     }
  1534.  
  1535.  
  1536.     # Do the angle bracket song and dance:
  1537.     s! <  !&lt;!gx;
  1538.     s! >  !&gt;!gx;
  1539.  
  1540.     # Now, escape characters that are magic in Markdown:
  1541.     s! \* !$g_escape_table{'*'}!gx;
  1542.     s! _  !$g_escape_table{'_'}!gx;
  1543.     s! ~  !$g_escape_table{'~'}!gx;
  1544.     s! {  !$g_escape_table{'{'}!gx;
  1545.     s! }  !$g_escape_table{'}'}!gx;
  1546.     s! \[ !$g_escape_table{'['}!gx;
  1547.     s! \] !$g_escape_table{']'}!gx;
  1548.     s! \\ !$g_escape_table{'\\'}!gx;
  1549.  
  1550.     return $_;
  1551. }
  1552.  
  1553.  
  1554. sub _DoItalicsAndBold {
  1555.     my $text = shift;
  1556.  
  1557.     $text =~ s{ __ (?=\S) (.+?) (?<=\S) __ }
  1558.         {⟦u⟧$1/u⟧}gsx;
  1559.  
  1560.     $text =~ s{ \*\* (?=\S) (.+?) (?<=\S) \*\* }
  1561.         {⟦b⟧$1/b⟧}gsx;
  1562.  
  1563.     $text =~ s{ ~~ (?=\S) (.+?) (?<=\S) ~~ }
  1564.         {s$1/s}gsx;
  1565.  
  1566.     $text =~ s{ \^\^ (?=\S) (.+?) (?<=\S) \^\^ }
  1567.         {⟦font size="5"$1/font⟧}gsx;
  1568.  
  1569.     $text =~ s{ \^ (?=\S) (.+?) (?<=\S) \^ }
  1570.         {⟦sup⟧$1/sup⟧}gsx;
  1571.  
  1572.     $text =~ s{ ~ (?=\S) (.+?) (?<=\S) ~ }
  1573.         {sub$1/sub}gsx;
  1574.  
  1575.     $text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }
  1576.         {⟦i⟧$2/i⟧}gsx;
  1577.  
  1578.     return $text;
  1579. }
  1580.  
  1581.  
  1582. sub _DoBlockQuotes {
  1583.     my $text = shift;
  1584.  
  1585.     $text =~ s{
  1586.           (                             # Wrap whole match in $1
  1587.             (
  1588.               ^[ \t]*>[ \t]?            # '>' at the start of a line
  1589.                 .+\n                    # rest of the first line
  1590.               (.+\n)*                   # subsequent consecutive lines
  1591.               \n*                       # blanks
  1592.             )+
  1593.           )
  1594.         }{
  1595.             my $bq = $1;
  1596.             $bq =~ s/^[ \t]*>[ \t]?//gm;    # trim one level of quoting
  1597.             $bq =~ s/^[ \t]+$//mg;          # trim whitespace-only lines
  1598.             $bq = _RunBlockGamut($bq);      # recurse
  1599.  
  1600.             # $bq =~ s/^/  /g;
  1601.             # These leading spaces screw with <pre> content, so we need to fix that:
  1602.             $bq =~ s{
  1603.                     (\s*\[pre\].+?\[/pre\])
  1604.                 }{
  1605.                     my $pre = $1;
  1606.                     $pre =~ s/^  //mg;
  1607.                     $pre;
  1608.                 }egsx;
  1609.  
  1610.             "⟦quote⟧$bq⟦/quote⟧\n\n";
  1611.         }egmx;
  1612.  
  1613.  
  1614.     return $text;
  1615. }
  1616.  
  1617.  
  1618. sub _FormParagraphs {
  1619. #
  1620. #   Params:
  1621. #       $text - string to process with html <p> tags
  1622. #
  1623.     my $text = shift;
  1624.  
  1625.     # Strip leading and trailing lines:
  1626.     $text =~ s/\A\n+//;
  1627.     $text =~ s/\n+\z//;
  1628.  
  1629.     my @grafs = split(/\n{2,}/, $text);
  1630.  
  1631.  
  1632.     #
  1633.     # _RunSpanGamut for each paragraph
  1634.     #
  1635.     foreach (@grafs) {
  1636.         unless (defined( $g_html_blocks{$_} )) {
  1637.             $_ = _RunSpanGamut($_);
  1638.             s/^([ \t]*)//;
  1639.         }
  1640.     }
  1641.  
  1642.     #
  1643.     # Unhashify BBCode blocks
  1644.     #
  1645.     foreach (@grafs) {
  1646.         if (defined( $g_html_blocks{$_} )) {
  1647.             $_ = $g_html_blocks{$_};
  1648.         }
  1649.     }
  1650.  
  1651.     return join "\n\n", @grafs;
  1652. }
  1653.  
  1654.  
  1655. sub _EncodeAmpsAndAngles {
  1656. # Smart processing for ampersands and angle brackets that need to be encoded.
  1657.  
  1658.     my $text = shift;
  1659.  
  1660.     # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
  1661.     #   http://bumppo.net/projects/amputator/
  1662.     $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
  1663.  
  1664.     # Encode naked <'s
  1665.     $text =~ s{<(?![a-z/?\$!])}{&lt;}gi;
  1666.  
  1667.     return $text;
  1668. }
  1669.  
  1670.  
  1671. sub _EncodeBackslashEscapes {
  1672. #
  1673. #   Parameter:  String.
  1674. #   Returns:    The string, with after processing the following backslash
  1675. #               escape sequences.
  1676. #
  1677.     local $_ = shift;
  1678.  
  1679.     s! \\\\  !$g_escape_table{'\\'}!gx;     # Must process escaped backslashes first.
  1680.     s! \\`   !$g_escape_table{'`'}!gx;
  1681.    s! \\\*  !$g_escape_table{'*'}!gx;
  1682.    s! \\_   !$g_escape_table{'_'}!gx;
  1683.    s{ \\~  }{$g_escape_table{'~'}}gx;
  1684.    s! \\\{  !$g_escape_table{'{'}!gx;
  1685.    s! \\\}  !$g_escape_table{'}'}!gx;
  1686.    s! \\\[  !$g_escape_table{'['}!gx;
  1687.    s! \\\]  !$g_escape_table{']'}!gx;
  1688.    s! \\\(  !$g_escape_table{'('}!gx;
  1689.    s! \\\)  !$g_escape_table{')'}!gx;
  1690.    s! \\>   !$g_escape_table{'>'}!gx;
  1691.    s! \\\#  !$g_escape_table{'#'}!gx;
  1692.     s! \\\+  !$g_escape_table{'+'}!gx;
  1693.     s! \\\-  !$g_escape_table{'-'}!gx;
  1694.     s! \\\.  !$g_escape_table{'.'}!gx;
  1695.     s{ \\!  }{$g_escape_table{'!'}}gx;
  1696.  
  1697.     return $_;
  1698. }
  1699.  
  1700.  
  1701. sub _DoAutoLinks {
  1702.     my $text = shift;
  1703.  
  1704.     $text =~ s{<((https?|ftp):[^'">\s]+)>}{⟦a href="$1"⟧$1⟦/a⟧}gi;
  1705.  
  1706.     # Email addresses: <address@domain.foo>
  1707.     $text =~ s{
  1708.         <
  1709.        (?:mailto:)?
  1710.         (
  1711.             [-.\w]+
  1712.             \@
  1713.             [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
  1714.         )
  1715.         >
  1716.     }{
  1717.         _EncodeEmailAddress( _UnescapeSpecialChars($1) );
  1718.     }egix;
  1719.  
  1720.     return $text;
  1721. }
  1722.  
  1723.  
  1724. sub _EncodeEmailAddress {
  1725. #
  1726. #   Input: an email address, e.g. "foo@example.com"
  1727. #
  1728. #   Output: the email address as a mailto link, with each character
  1729. #       of the address encoded as either a decimal or hex entity, in
  1730. #       the hopes of foiling most address harvesting spam bots. E.g.:
  1731. #
  1732. #     <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
  1733. #       x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
  1734. #       &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
  1735. #
  1736. #   Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
  1737. #   mailing list: <http://tinyurl.com/yu7ue>
  1738. #
  1739.  
  1740.     my $addr = shift;
  1741.  
  1742.     srand;
  1743.     my @encode = (
  1744.         sub { '&#' .                 ord(shift)   . ';' },
  1745.         sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
  1746.         sub {                            shift          },
  1747.     );
  1748.  
  1749.     $addr = "mailto:" . $addr;
  1750.  
  1751.     $addr =~ s{(.)}{
  1752.         my $char = $1;
  1753.         if ( $char eq '@' ) {
  1754.             # this *must* be encoded. I insist.
  1755.             $char = $encode[int rand 1]->($char);
  1756.         } elsif ( $char ne ':' ) {
  1757.             # leave ':' alone (to spot mailto: later)
  1758.             my $r = rand;
  1759.             # roughly 10% raw, 45% hex, 45% dec
  1760.             $char = (
  1761.                 $r > .9   ?  $encode[2]->($char)  :
  1762.                 $r < .45  ?  $encode[1]->($char)  :
  1763.                              $encode[0]->($char)
  1764.             );
  1765.         }
  1766.         $char;
  1767.     }gex;
  1768.  
  1769.     $addr = qq{⟦a href="$addr"$addr/a⟧};
  1770.     $addr =~ s{"⟧.+?:}{"}; # strip the mailto: from the visible part
  1771.  
  1772.     return $addr;
  1773. }
  1774.  
  1775.  
  1776. sub _UnescapeSpecialChars {
  1777. #
  1778. # Swap back in all the special characters we've hidden.
  1779. #
  1780.     my $text = shift;
  1781.  
  1782.     while( my($char, $hash) = each(%g_escape_table) ) {
  1783.         $text =~ s/$hash/$char/g;
  1784.     }
  1785.     return $text;
  1786. }
  1787.  
  1788.  
  1789. sub _TokenizeHTML {
  1790. #
  1791. #   Parameter:  String containing HTML markup.
  1792. #   Returns:    Reference to an array of the tokens comprising the input
  1793. #               string. Each token is either a tag (possibly with nested,
  1794. #               tags contained therein, such as <a href="<MTFoo>">, or a
  1795. #               run of text between tags. Each element of the array is a
  1796. #               two-element array; the first is either 'tag' or 'text';
  1797. #               the second is the actual value.
  1798. #
  1799. #
  1800. #   Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
  1801. #       <http://www.bradchoate.com/past/mtregex.php>
  1802. #
  1803.  
  1804.     my $str = shift;
  1805.     my $pos = 0;
  1806.     my $len = length $str;
  1807.     my @tokens;
  1808.  
  1809.     my $depth = 6;
  1810.     my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x  $depth);
  1811.     my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) |  # comment
  1812.                    (?s: <\? .*? \?> ) |              # processing instruction
  1813.                    $nested_tags/ix;                   # nested tags
  1814.  
  1815.     while ($str =~ m/($match)/g) {
  1816.         my $whole_tag = $1;
  1817.         my $sec_start = pos $str;
  1818.         my $tag_start = $sec_start - length $whole_tag;
  1819.         if ($pos < $tag_start) {
  1820.             push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
  1821.         }
  1822.         push @tokens, ['tag', $whole_tag];
  1823.         $pos = pos $str;
  1824.     }
  1825.     push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
  1826.     \@tokens;
  1827. }
  1828.  
  1829.  
  1830. sub _Outdent {
  1831. #
  1832. # Remove one level of line-leading tabs or spaces
  1833. #
  1834.     my $text = shift;
  1835.  
  1836.     $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
  1837.     return $text;
  1838. }
  1839.  
  1840.  
  1841. sub _Detab {
  1842. #
  1843. # Cribbed from a post by Bart Lateur:
  1844. # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
  1845. #
  1846.     my $text = shift;
  1847.  
  1848.     $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
  1849.     return $text;
  1850. }
  1851.  
  1852.  
  1853. sub _PreProcess {
  1854.     my $text = shift;
  1855.     # Here we process stuff like @[include](file) that must be done first
  1856.  
  1857.     # `include` inserts a Markdown file without running Markdown2BBCode.pl on it first like `process`
  1858.     # so included files should not contain YAML metadata unless the include statement is the first line!
  1859.    
  1860.     $text =~ s{
  1861.             @ \[ include \]
  1862.             \(          # literal paren
  1863.                 ($g_nested_parens)  # filename = $1
  1864.             \)
  1865.     }{
  1866.         local $/=undef;
  1867.         open FILE, $1 or die "Couldn't open file: $!";
  1868.         my $string = <FILE>;
  1869.         close FILE;
  1870.         $string;
  1871.     }exg;
  1872.    
  1873.     return $text;
  1874. }
  1875.  
  1876.  
  1877. sub _PostProcess {
  1878.    
  1879.     my $text = shift;
  1880.    
  1881.     # Remove HTML comments
  1882.     $text =~ s/<! ( -- .*? -- \s* )+ >//gsx;
  1883.    
  1884.     # Translate double square brackets we use internally back to the regular square brackets BBCode uses
  1885.     $text =~ tr/⟦⟧/\[\]/;
  1886.    
  1887.     # @[process](file)
  1888.     # Unlike `include`, `process` actually runs Markdown2BBCode.pl on a file and includes the results
  1889.     $text =~ s{
  1890.             @ \[ process \]
  1891.             \(          # literal paren
  1892.                 ($g_nested_parens)  # filename = $1
  1893.             \)
  1894.     }{
  1895.         `perl Markdown2BBCode.pl $1`;
  1896.     }egx;
  1897.  
  1898.     # @[noprocess](file)
  1899.     # Unlike `process`, `noprocess` does no processing on a file. It includes it literally
  1900.     $text =~ s{
  1901.             @ \[ noprocess \]
  1902.             \(          # literal paren
  1903.                 ($g_nested_parens)  # filename = $1
  1904.             \)
  1905.     }{
  1906.             $matharr[$mathctr] = `cat $1`;
  1907.             "(※" . $mathctr++ . ")"
  1908.     }egx;
  1909.  
  1910.     # Put math formulae back, unescape dollar signs
  1911.     $text =~ s/\(※(\d+)\)/$matharr[$1]/gsex;
  1912.     $text =~ s/\\\$/\$/gs;
  1913.  
  1914.     return $text;
  1915. }  
  1916.  
  1917.  
  1918. #
  1919. # YAML stuff
  1920. #
  1921.  
  1922. sub _InitYAML {
  1923.     my $text = shift;
  1924.     $text =~ s{
  1925.             ^
  1926.             (
  1927.             (?: \% YAML \s+ [\d\.]+ \n)?
  1928.             --- \n
  1929.             (?: .* \n)+?
  1930.             (--- | \.\.\.) \n
  1931.             )
  1932.         }{
  1933.             my $string = $1;
  1934.             if(length $string > 0) {
  1935.                 # Found some YAML at the beginning of the doc
  1936.                 # Let YAML::Tiny parse it
  1937.                 $yaml = YAML::Tiny->read_string($string);
  1938.             } else {
  1939.                 # No YAML Found! Create an empty YAML object
  1940.                 $yaml = new YAML::Tiny;
  1941.             }
  1942.             ""
  1943.         }mex;
  1944.     return $text;
  1945. }
  1946.  
  1947. sub _DoMetadataVariables {
  1948.   my $text = shift;
  1949.  
  1950.   # Substitute tags like this
  1951.   #     [%key]
  1952.   # with the YAML metadata from the file header corresponding to the key
  1953.  
  1954.   $text =~ s{
  1955.     (               # wrap whole match in $1
  1956.         \[
  1957.             \%
  1958.             (.*?)       # key = $2
  1959.         \]
  1960.     )
  1961.   }{
  1962.     my $key = $2;
  1963.     my $value = $yaml->[0]->{$key};
  1964.     if($value eq ""){
  1965.         $value = $key;
  1966.     }
  1967.     $value;
  1968.   }gsex;
  1969.  
  1970.   return $text;
  1971. }
  1972.  
  1973.  
  1974. #
  1975. # LaTeX Math stuff
  1976. #
  1977.  
  1978. sub _InitMath {
  1979.  
  1980.   @matharr = ();
  1981.   $mathctr = 1;
  1982.  
  1983.   # Commonly used symbols
  1984.   %mathsym = (
  1985.  
  1986.     # Operators
  1987.     pm => '±',
  1988.     cdot => '·',
  1989.     div => '÷',
  1990.     times => '×',
  1991.     prod => '∏',
  1992.     sum => '∑',
  1993.     surd => '√',
  1994.     cuberoot => '∛',
  1995.     wedge => '∧',
  1996.     vee => '∨',
  1997.     oplus => '⊕',
  1998.     otimes => '⊗',
  1999.     ast => '∗',
  2000.     circ => '°',
  2001.     bowtie => '⋈',
  2002.  
  2003.     # Comparison
  2004.     neq => '≠',
  2005.     approx => '≈',
  2006.     napprox => '≉',
  2007.     propto => '∝',
  2008.     equiv => '≡',
  2009.     nequiv => '≢',
  2010.     leq => '≤',
  2011.     geq => '≥',
  2012.     ll => '≪',
  2013.     gg => '≫',
  2014.  
  2015.     # Calculus
  2016.     ldots => '...',
  2017.     infty => '∞',
  2018.     prime => '′',
  2019.     second => '″',
  2020.     partial => '∂',
  2021.     nabla => '∇',
  2022.     int => '∫',
  2023.     iint => '∬',
  2024.     iiint => '∭',
  2025.     oint => '∮',
  2026.     oiint => '∯',
  2027.     oiiint => '∰',
  2028.     sumint => '⨋',
  2029.  
  2030.     # Script
  2031.     ell => 'ℓ',
  2032.     Re => 'ℜ',
  2033.     Im => 'ℑ',
  2034.  
  2035.     # Logic
  2036.     neg => '¬',
  2037.     forall => '∀',
  2038.     exists => '∃',
  2039.     nexists => '∄',
  2040.     therefore => '∴',
  2041.     QED => '∎',
  2042.     vdash => '⊢',
  2043.     vDash => '⊨',
  2044.     bot => '⊥',
  2045.     top => '⊤',
  2046.  
  2047.     # Set theory
  2048.     in => '∈',
  2049.     setminus => '\\',
  2050.     backslash => '\\',
  2051.     varnothing => '∅',
  2052.     emptyset => '∅',
  2053.     cap => '∩',
  2054.     cup => '∪',
  2055.     subset => '⊂',
  2056.     supset => '⊃',
  2057.     subseteq => '⊆',
  2058.     subsetneq => '⊊',
  2059.     aleph => 'ℵ',
  2060.  
  2061.     # Physics
  2062.     Planckconst => 'ℎ',
  2063.     hslash => 'ℏ',   
  2064.     langle => '⟨',
  2065.     rangle => '⟩',
  2066.  
  2067.     # Geometry
  2068.     rightangle => '∟',
  2069.     angle => '∠',
  2070.     parallel => '∥',
  2071.     nparallel => '∦',
  2072.     perp => '⟂',
  2073.     bigtriangleup => '△',
  2074.     square => '◻',
  2075.     blacksquare => '◼',
  2076.     hrectangle => '▭',
  2077.     vrectangle => '▯',
  2078.     parallelogram => '▱',
  2079.     diamond => '⋄',
  2080.     Diamond => '◇',
  2081.     Diamondblack => '◆',
  2082.  
  2083.     # Spaces
  2084.     quad => ' ',
  2085.     qquad => '  ',
  2086.     textvisiblespace => '␣',
  2087.    
  2088.     # Money
  2089.     cent => '¢',
  2090.     pounds => '£',
  2091.     yen => '¥',
  2092.     euro => '€',
  2093.  
  2094.     # Arrows
  2095.     to => '→',
  2096.     rightarrow => '→',
  2097.     mapsto => '↦',
  2098.     Rightarrow => '⇒',
  2099.     Leftrightarrow => '⇔',
  2100.     hookrightarrow => '↪',
  2101.     uparrow => '↑',
  2102.     downarrow => '↓',
  2103.     leftarrow => '←',
  2104.     rightrightarrows => '⇉',
  2105.     leadsto => '⤳',
  2106.  
  2107.     # Typographical marks
  2108.     P => '',    # Pilcrow
  2109.     S => '§',  # Section sign
  2110.     ddag => '‡',
  2111.     dag => '†',
  2112.     textquestiondown => '¿',
  2113.     textexclamdown => '¡',
  2114.  
  2115.     # Dingbats
  2116.     dagger => '†',
  2117.     checkmark => '✓',
  2118.     ballotx => '✗',
  2119.     danger => '☡',
  2120.    
  2121.     # Music
  2122.     sharp => '♯',
  2123.     natural => '♮',
  2124.     flat => '♭',
  2125.    
  2126.     # Card suits
  2127.     spadesuit => '♠',
  2128.     heartsuit => '♡',
  2129.     diamondsuit => '♢',
  2130.     clubsuit => '♣',
  2131.  
  2132.     # Legal
  2133.     copyright => 'Ⓒ',
  2134.     texttrademark => '™',
  2135.     textregistered => '®',
  2136.  
  2137.     # Greek letters
  2138.     Alpha => 'Α',
  2139.     alpha => 'α',
  2140.     Beta => 'Β',
  2141.     beta => 'β',
  2142.     Gamma => 'Γ',
  2143.     gamma => 'γ',
  2144.     Delta => 'Δ',
  2145.     delta => 'δ',
  2146.     Epsilon => 'Ε',
  2147.     epsilon => 'ε',
  2148.     Zeta => 'Ζ',
  2149.     zeta => 'ζ',
  2150.     Eta => 'Η',
  2151.     eta => 'η',
  2152.     Theta => 'Θ',
  2153.     theta => 'θ',
  2154.     Iota => 'Ι',
  2155.     iota => 'ι',
  2156.     Kappa => 'Κ',
  2157.     kappa => 'κ',
  2158.     Lambda => 'Λ',
  2159.     lambda => 'λ',
  2160.     Mu => 'Μ',
  2161.     mu => 'μ',
  2162.     Nu => 'Ν',
  2163.     nu => 'ν',
  2164.     Xi => 'Ξ',
  2165.     xi => 'ξ',
  2166.     Omicron => 'Ο',
  2167.     omicron => 'ο',
  2168.     Pi => 'Π',
  2169.     pi => 'π',
  2170.     Rho => 'Ρ',
  2171.     rho => 'ρ',
  2172.     Sigma => 'Σ',
  2173.     sigma => 'σ',
  2174.     Tau => 'Τ',
  2175.     tau => 'τ',
  2176.     Upsilon => 'Υ',
  2177.     upsilon => 'υ',
  2178.     Phi => 'Φ',
  2179.     phi => 'φ',
  2180.     Chi => 'Χ',
  2181.     chi => 'χ',
  2182.     Psi => 'Ψ',
  2183.     psi => 'ψ',
  2184.     Omega => 'Ω',
  2185.     omega => 'ω',
  2186.    
  2187.     # Catch "text" tag
  2188.     text => '::text'
  2189.   );
  2190.  
  2191.   # Blackboard
  2192.   %mathbb = (
  2193.     N => 'ℕ', # Natural numbers
  2194.     Z => 'ℤ', # Integers
  2195.     Q => 'ℚ', # Rationals
  2196.     R => 'ℝ', # Reals
  2197.     C => 'ℂ', # Complex
  2198.     H => 'ℍ'  # Quaternions
  2199.   );
  2200.  
  2201.   # Calligraphy
  2202.   %mathcal = (
  2203.     e => 'ℯ',
  2204.     E => 'ℰ',
  2205.     F => 'ℱ',
  2206.     H => 'ℋ', # Hamiltonian
  2207.     L => 'ℒ', # Lagrangian
  2208.     M => 'ℳ'
  2209.   );
  2210.  
  2211.   # Six-sided dice faces
  2212.   %dice = (
  2213.     i => '⚀',
  2214.     ii => '⚁',
  2215.     iii => '⚂',
  2216.     iv => '⚃',
  2217.     v => '⚄',
  2218.     vi => '⚅'
  2219.   );
  2220. }
  2221.  
  2222. sub num {
  2223.     my $n = shift;
  2224.     $n = _DoTheMath($n);
  2225.     if($n =~ /^\s*([\d\.]+)\s*$/){ return $1; }
  2226.     if( $n !~ /\s/ ){ return $n; }
  2227.     return "(" . $n . ")" ;
  2228. }
  2229.  
  2230. sub frac {
  2231.   my ($a, $b) = @_;
  2232.   return num($a) . "/" . num($b);
  2233. }
  2234.  
  2235. sub _DoTheMath {
  2236.     my $text = shift;
  2237.  
  2238.     $text =~ s/_\{($g_nested_braces)\}/\[sub\]$1\[\/sub\]/g;
  2239.     $text =~ s/\^\{($g_nested_braces)\}/\[sup\]$1\[\/sup\]/g;
  2240.     $text =~ s/_(\\?\w+)/\[sub\]$1\[\/sub\]/g;
  2241.     $text =~ s/\^(\\?\w+)/\[sup\]$1\[\/sup\]/g;
  2242.     $text =~ s/\\not\s*\\in/∉/g;
  2243.     $text =~ s/\\frac\{($g_nested_braces)\}\{($g_nested_braces)\}/frac($1,$2)/eg;
  2244.     $text =~ s/\\sqrt\{($g_nested_braces)\}/"√".num($1)/eg;
  2245.     $text =~ s/\\mathbb\{(\w+)\}/$mathbb{$1}/eg;
  2246.     $text =~ s/\\mathcal\{(\w+)\}/$mathcal{$1}/eg;
  2247.     $text =~ s/\\dice\{([vi]{1,3})\}/$dice{$1}/eg;
  2248.     $text =~ s/\\pmod\s+(\w+)/"(mod " . $1 . ")"/eg;
  2249.     $text =~ s/\\bmod\s+(\w+)/"mod " . $1/eg;
  2250.     $text =~ s/\\(sinh|cosh|tanh|coth|arcsin|arccos|arctan|arccot|sin|cos|tan|cot|sec|csc|log|exp)[ ]*/$1/g;
  2251.     $text =~ s/\\([a-zA-Z]+)[ ]*/$mathsym{$1}/eg;
  2252.     $text =~ s/::text\{([^\}]+)\}/$1/g; # TODO: This needs to be hashed at the beginning and restored at the end to prevent interpoloation!
  2253.    
  2254.     # Escapes
  2255.     $text =~ s/\\!//g;
  2256.     $text =~ s/\\,//g;
  2257.     $text =~ s/\\:/ /g;
  2258.     $text =~ s/\\;/ /g;
  2259.     #$text =~ s/\\>/ /g;
  2260.     $text =~ s/\\\}/\}/g;
  2261.     $text =~ s/\\\{/\{/g;
  2262.     $text =~ s/\\\\/\\/g;
  2263.  
  2264.     # normalize whitespace
  2265.     #$text =~ s/^\s+//g;
  2266.     #$text =~ s/\s+$//g;
  2267.     #$text =~ s/\s+/ /g;
  2268.  
  2269.     return $text;
  2270. }
  2271.  
  2272.  
  2273. sub _HashMathSpans {
  2274.     my $text = shift;
  2275.         $text =~ s{
  2276.             (?<! \\ )
  2277.             \$
  2278.             (?! \s )
  2279.             ( .+? )
  2280.             (?<! \s )
  2281.             \$
  2282.             }{
  2283.             $matharr[$mathctr] = _DoTheMath($1);
  2284.             "(※" . $mathctr++ . ")"
  2285.             }egmx;
  2286.     return $text;
  2287. }
  2288.  
  2289.  
  2290. 1;
  2291.  
  2292.  
  2293. __END__
  2294.  
  2295.  
  2296. =pod
  2297.  
  2298. =head1 NAME
  2299.  
  2300. B<Markdown2BBCode>
  2301.  
  2302.  
  2303. =head1 SYNOPSIS
  2304.  
  2305. B<Markdown2BBCode.pl> [ B<--version> ] [ B<-shortversion> ]
  2306.     [ I<file> ... ]
  2307.  
  2308.  
  2309. =head1 DESCRIPTION
  2310.  
  2311. Markdown2BBCode.pl is a text-to-BBCode filter; it translates an easy-to-read /
  2312. easy-to-write structured text format into BBCode. Markdown's text format
  2313. is most similar to that of plain text email, and supports features such
  2314. as headers, *emphasis*, code blocks, blockquotes, and links.
  2315.  
  2316. Markdown2BBCode's syntax is designed not as a generic markup language, but
  2317. specifically to serve as a front-end to BBCode. You can  use span-level
  2318. BBCode tags anywhere in a Markdown2BBCode document, and you can use block level
  2319. BBCode tags (like [div] and [table] as well).
  2320.  
  2321. For more information about Markdown's syntax, see:
  2322.  
  2323.     http://daringfireball.net/projects/markdown/
  2324.  
  2325.  
  2326. =head1 OPTIONS
  2327.  
  2328. Use "--" to end switch parsing. For example, to open a file named "-z", use:
  2329.  
  2330.     Markdown2BBCode.pl -- -z
  2331.  
  2332. =over 4
  2333.  
  2334.  
  2335. =item B<-v>, B<--version>
  2336.  
  2337. Display Markdown2BBCode's version number and copyright information.
  2338.  
  2339.  
  2340. =item B<-s>, B<--shortversion>
  2341.  
  2342. Display the short-form version number.
  2343.  
  2344.  
  2345. =back
  2346.  
  2347.  
  2348.  
  2349. =head1 BUGS
  2350.  
  2351. To file bug reports or feature requests (other than topics listed in the
  2352. Caveats section above) please send email to:
  2353.  
  2354.     krusader74 at yahoo dot com
  2355.  
  2356. Please include with your report: (1) the example input; (2) the output
  2357. you expected; (3) the output Markdown2BBCode.pl actually produced.
  2358.  
  2359.  
  2360. =head1 VERSION HISTORY
  2361.  
  2362. See the readme file for detailed release notes for this version.
  2363.  
  2364. 2.0 -- 26 Jan 2016
  2365.  
  2366. 3.0 -- 20 Feb 2016
  2367.  
  2368.   * Fixed: The \LaTex{} expressions \frac{}, \sqrt{}, ^{}, _{}
  2369.     now allow for nested braces.
  2370.  
  2371.   * Added: Preliminary support for Pandoc simple tables, multiline
  2372.     tables, and grid tables. See http://pandoc.org/README.html#tables
  2373.  
  2374.  
  2375. =head1 AUTHOR
  2376.  
  2377.     Original Markdown.pl Perl script by John Gruber
  2378.     http://daringfireball.net
  2379.  
  2380.     PHP port and other contributions by Michel Fortin
  2381.     http://michelf.com
  2382.  
  2383.     BBCode port and other contributions by Krusader74
  2384.     http://odd74.proboards.com/user/2141
  2385.     http://pastebin.com/u/krusader74
  2386.     krusader74 at yahoo dot com
  2387.  
  2388. =head1 COPYRIGHT AND LICENSE
  2389.  
  2390. Copyright (c) 2003-2004 John Gruber  
  2391. <http://daringfireball.net/>  
  2392. All rights reserved.
  2393.  
  2394. Redistribution and use in source and binary forms, with or without
  2395. modification, are permitted provided that the following conditions are
  2396. met:
  2397.  
  2398. * Redistributions of source code must retain the above copyright notice,
  2399.   this list of conditions and the following disclaimer.
  2400.  
  2401. * Redistributions in binary form must reproduce the above copyright
  2402.   notice, this list of conditions and the following disclaimer in the
  2403.   documentation and/or other materials provided with the distribution.
  2404.  
  2405. * Neither the name "Markdown" nor the names of its contributors may
  2406.   be used to endorse or promote products derived from this software
  2407.   without specific prior written permission.
  2408.  
  2409. This software is provided by the copyright holders and contributors "as
  2410. is" and any express or implied warranties, including, but not limited
  2411. to, the implied warranties of merchantability and fitness for a
  2412. particular purpose are disclaimed. In no event shall the copyright owner
  2413. or contributors be liable for any direct, indirect, incidental, special,
  2414. exemplary, or consequential damages (including, but not limited to,
  2415. procurement of substitute goods or services; loss of use, data, or
  2416. profits; or business interruption) however caused and on any theory of
  2417. liability, whether in contract, strict liability, or tort (including
  2418. negligence or otherwise) arising in any way out of the use of this
  2419. software, even if advised of the possibility of such damage.
  2420.  
  2421. =cut
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement